pax_global_header00006660000000000000000000000064135336241630014520gustar00rootroot0000000000000052 comment=1a1e7411aed011ba757953c004380fa32cce0293 vecf64-0.9.0/000077500000000000000000000000001353362416300126235ustar00rootroot00000000000000vecf64-0.9.0/.gitignore000066400000000000000000000004331353362416300146130ustar00rootroot00000000000000# Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.prof vendor vendor/* vecf64-0.9.0/.travis.yml000066400000000000000000000005021353362416300147310ustar00rootroot00000000000000sudo: false language: go branches: only: - master go: - 1.7.x - 1.8.x - 1.9.x - 1.10.x - tip env: global: - GOARCH=amd64 - TRAVISTEST=true go_import_path: gorgonia.org/vecf64 before_install: - go get github.com/mattn/goveralls script: - ./test.sh matrix: allow_failures: - go: tipvecf64-0.9.0/CONTRIBUTORS.md000066400000000000000000000004151353362416300151020ustar00rootroot00000000000000# Contributors # The list of contributors to this library is listed in alphabetical order, first by name, failing which (i.e. if a name was not provided), the Github username will be used. * Austin Clements (@aclements) * Naseer Dari (@ndari) * Xuanyi Chew (@chewxy)vecf64-0.9.0/Gopkg.lock000066400000000000000000000013541353362416300145470ustar00rootroot00000000000000# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. [[projects]] name = "github.com/davecgh/go-spew" packages = ["spew"] revision = "346938d642f2ec3594ed81d874461961cd0faa76" version = "v1.1.0" [[projects]] name = "github.com/pmezard/go-difflib" packages = ["difflib"] revision = "792786c7400a136282c1664665ae0a8db921c6c2" version = "v1.0.0" [[projects]] name = "github.com/stretchr/testify" packages = ["assert"] revision = "69483b4bd14f5845b5a1e55bca19e954e827f1d0" version = "v1.1.4" [solve-meta] analyzer-name = "dep" analyzer-version = 1 inputs-digest = "b750880cdc8ce044e6f9bf3b331d8a392471c328107b8c3d42e3e11022d76858" solver-name = "gps-cdcl" solver-version = 1 vecf64-0.9.0/Gopkg.toml000066400000000000000000000001121353362416300145610ustar00rootroot00000000000000[[constraint]] name = "github.com/stretchr/testify" version = "1.1.4" vecf64-0.9.0/LICENSE000066400000000000000000000020541353362416300136310ustar00rootroot00000000000000MIT License Copyright (c) 2017 Xuanyi Chew Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. vecf64-0.9.0/README.md000066400000000000000000000042621353362416300141060ustar00rootroot00000000000000# vecf64 [![GoDoc](https://godoc.org/gorgonia.org/vecf64?status.svg)](https://godoc.org/gorgonia.org/vecf64) [![Build Status](https://travis-ci.org/gorgonia/vecf64.svg?branch=master)](https://travis-ci.org/gorgonia/vecf64) [![Coverage Status](https://coveralls.io/repos/github/gorgonia/vecf64/badge.svg?branch=master)](https://coveralls.io/github/gorgonia/vecf64?branch=master) Package vecf64 provides common functions and methods for slices of float64 # Installation `go get -u gorgonia.org/vecf64` This package uses the standard library only. For testing this package uses [testify/assert](https://github.com/stretchr/testify), which is licenced with a [MIT/BSD-like licence](https://github.com/stretchr/testify/blob/master/LICENSE) # Build Tags The point of this package is to provide operations that are accelerated by SIMD. However, this pakcage by default does not use SIMD. To use SIMD, build tags must be used. The supported build tags are `sse` and `avx`. Here's an example on how to use them: * [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions) - `go build -tags='sse' ... * [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) - `go build -tags='avx' ... ### Why are there so many `a = a[:len(a)]` lines? This is mainly done to eliminate bounds checking in a loop. The idea is the bounds of the slice is checked early on, and if need be, panics early. Then if everything is normal, there won't be bounds checking while in the loop. To check for boundschecking and bounds check elimination (an amazing feature that landed in Go 1.7), compile your programs with `-gcflags='-d=ssa/check_bce/debug=1'`. # Contributing Contributions are welcome. The typical process works like this: 1. File an issue on the topic you want to contribute 2. Fork this repo 3. Add your contribution 4. Make a pull request 5. The pull request will be merged once tests pass, and code reviewed. 6. Add your name (if it hasn't already been added to CONTRIBUTORS.md) ## Pull Requests This package is very well tested. Please ensure tests are written if any new features are added. If bugs are fixed, please add the bugs to the tests as well. # Licence Package vecf64 is licenced under the MIT licence.vecf64-0.9.0/arith.go000066400000000000000000000074351353362416300142720ustar00rootroot00000000000000package vecf64 import "math" // Pow performs elementwise // a̅ ^ b̅ func Pow(a, b []float64) { b = b[:len(a)] for i, v := range a { switch b[i] { case 0: a[i] = float64(1) case 1: a[i] = v case 2: a[i] = v * v case 3: a[i] = v * v * v default: a[i] = math.Pow(v, b[i]) } } } func Mod(a, b []float64) { b = b[:len(a)] for i, v := range a { a[i] = math.Mod(v, b[i]) } } // Scale multiplies all values in the slice by the scalar. It performs elementwise // a̅ * s func Scale(a []float64, s float64) { for i, v := range a { a[i] = v * s } } // ScaleInv divides all values in the slice by the scalar. It performs elementwise // a̅ / s func ScaleInv(a []float64, s float64) { Scale(a, 1/s) } /// ScaleInvR divides all numbers in the slice by a scalar // s / a̅ func ScaleInvR(a []float64, s float64) { for i, v := range a { a[i] = s / v } } // Trans adds all the values in the slice by a scalar // a̅ + s func Trans(a []float64, s float64) { for i, v := range a { a[i] = v + s } } // TransInv subtracts all the values in the slice by a scalar // a̅ - s func TransInv(a []float64, s float64) { Trans(a, -s) } // TransInvR subtracts all the numbers in a slice from a scalar // s - a̅ func TransInvR(a []float64, s float64) { for i, v := range a { a[i] = s - v } } // PowOf performs elementwise // a̅ ^ s func PowOf(a []float64, s float64) { for i, v := range a { a[i] = math.Pow(v, s) } } // PowOfR performs elementwise // s ^ a̅ func PowOfR(a []float64, s float64) { for i, v := range a { a[i] = math.Pow(s, v) } } // Max takes two slices, a̅ + b̅, and compares them elementwise. The highest value is put into a̅. func Max(a, b []float64) { b = b[:len(a)] for i, v := range a { bv := b[i] if bv > v { a[i] = bv } } } // Min takes two slices, a̅ + b̅ and compares them elementwise. The lowest value is put into a̅. func Min(a, b []float64) { b = b[:len(a)] for i, v := range a { bv := b[i] if bv < v { a[i] = bv } } } /* REDUCTION RELATED */ // Sum sums a slice of float64 and returns a float64 func Sum(a []float64) float64 { return Reduce(add, float64(0), a...) } // MaxOf finds the max of a []float64. it panics if the slice is empty func MaxOf(a []float64) (retVal float64) { if len(a) < 1 { panic("Cannot find the max of an empty slice") } return Reduce(max, a[0], a[1:]...) } // MinOf finds the max of a []float64. it panics if the slice is empty func MinOf(a []float64) (retVal float64) { if len(a) < 1 { panic("Cannot find the min of an empty slice") } return Reduce(min, a[0], a[1:]...) } // Argmax returns the index of the min in a slice func Argmax(a []float64) int { var f float64 var max int var set bool for i, v := range a { if !set { f = v max = i set = true continue } // TODO: Maybe error instead of this? if math.IsNaN(v) || math.IsInf(v, 1) { max = i f = v break } if v > f { max = i f = v } } return max } // Argmin returns the index of the min in a slice func Argmin(a []float64) int { var f float64 var min int var set bool for i, v := range a { if !set { f = v min = i set = true continue } // TODO: Maybe error instead of this? if math.IsNaN(v) || math.IsInf(v, -1) { min = i f = v break } if v < f { min = i f = v } } return min } /* FUNCTION VARIABLES */ var ( add = func(a, b float64) float64 { return a + b } // sub = func(a, b float64) float64 { return a - b } // mul = func(a, b float64) float64 { return a * b } // div = func(a, b float64) float64 { return a / b } // mod = func(a, b float64) float64 { return math.Mod(a, b) } min = func(a, b float64) float64 { if a < b { return a } return b } max = func(a, b float64) float64 { if a > b { return a } return b } ) vecf64-0.9.0/arith_bench_test.go000066400000000000000000000044141353362416300164620ustar00rootroot00000000000000// +build !sse,!avx package vecf64 import ( "math" "testing" ) /* BENCHMARKS */ func _vanillaVecAdd(a, b []float64) { for i := range a { a[i] += b[i] } } func BenchmarkVecAdd(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Add(x, y) } } func BenchmarkVanillaVecAdd(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecAdd(x, y) } } func _vanillaVecSub(a, b []float64) { for i := range a { a[i] -= b[i] } } func BenchmarkVecSub(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Sub(x, y) } } func BenchmarkVanillaVecSub(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecSub(x, y) } } func _vanillaVecMul(a, b []float64) { for i := range a { a[i] *= b[i] } } func BenchmarkVecMul(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Mul(x, y) } } func BenchmarkVanillaVecMul(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecMul(x, y) } } func _vanillaVecDiv(a, b []float64) { for i := range a { a[i] /= b[i] } } func BenchmarkVecDiv(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Div(x, y) } } func BenchmarkVanillaVecDiv(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecDiv(x, y) } } func _vanillaVecSqrt(a []float64) { for i, v := range a { a[i] = math.Sqrt(v) } } func BenchmarkVecSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { Sqrt(x) } } func BenchmarkVanillaVecSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { _vanillaVecSqrt(x) } } func _vanillaVecInverseSqrt(a []float64) { for i, v := range a { a[i] = 1.0 / math.Sqrt(v) } } func BenchmarkVecInvSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { InvSqrt(x) } } func BenchmarkVanillaVecInvSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { _vanillaVecInverseSqrt(x) } } vecf64-0.9.0/arith_test.go000066400000000000000000000155471353362416300153340ustar00rootroot00000000000000package vecf64 import ( "math" "testing" "unsafe" "github.com/stretchr/testify/assert" ) // 1049 is actually a prime, so it cannot be divisible by any other number // This is a good way to test that the remainder part of the Add/Sub/Mul/Div/Pow works const ( // niceprime = 37 // niceprime = 1049 niceprime = 597929 // niceprime = 1299827 // because sometimes I feel like being an idiot ) func TestAdd(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime) correct := Range(0, niceprime) for i := range correct { correct[i] = correct[i] + correct[i] } Add(a, a) assert.Equal(correct, a) b := Range(niceprime, 2*niceprime) for i := range correct { correct[i] = a[i] + b[i] } Add(a, b) assert.Equal(correct, a) /* Weird Corner Cases*/ for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { b = Range(i, 2*i) correct = make([]float64, i) for j := range correct { correct[j] = b[j] + a[j] } Add(a, b) assert.Equal(correct, a) } } } func TestSub(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime) correct := make([]float64, niceprime) Sub(a, a) assert.Equal(correct, a) b := Range(niceprime, 2*niceprime) for i := range correct { correct[i] = a[i] - b[i] } Sub(a, b) assert.Equal(correct, a) /* Weird Corner Cases*/ for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { b = Range(i, 2*i) correct = make([]float64, i) for j := range correct { correct[j] = a[j] - b[j] } Sub(a, b) assert.Equal(correct, a) } } } func TestMul(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime) correct := Range(0, niceprime) for i := range correct { correct[i] = correct[i] * correct[i] } Mul(a, a) assert.Equal(correct, a) b := Range(niceprime, 2*niceprime) for i := range correct { correct[i] = a[i] * b[i] } Mul(a, b) assert.Equal(correct, a) /* Weird Corner Cases*/ for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { b = Range(i, 2*i) correct = make([]float64, i) for j := range correct { correct[j] = a[j] * b[j] } Mul(a, b) assert.Equal(correct, a) } } } func TestPow(t *testing.T) { a := []float64{0, 1, 2, 3, 4} b := []float64{0, 1, 2, 3, 4} correct := make([]float64, 5) for i := range correct { correct[i] = math.Pow(a[i], b[i]) } Pow(a, b) assert.Equal(t, correct, a) } func TestScale(t *testing.T) { a := []float64{0, 1, 2, 3, 4} correct := make([]float64, 5) for i := range correct { correct[i] = a[i] * 5 } Scale(a, 5) assert.Equal(t, correct, a) } func TestScaleInv(t *testing.T) { a := []float64{0, 1, 2, 4, 6} correct := make([]float64, len(a)) for i := range correct { correct[i] = a[i] / 2 } ScaleInv(a, 2) assert.Equal(t, correct, a) } func TestScaleInvR(t *testing.T) { a := []float64{0, 1, 2, 4, 6} correct := make([]float64, len(a)) for i := range correct { correct[i] = 2 / a[i] } ScaleInvR(a, 2) assert.Equal(t, correct, a) } func TestTrans(t *testing.T) { a := []float64{1, 2, 3, 4} correct := make([]float64, 4) for i := range correct { correct[i] = a[i] + float64(1) } Trans(a, 1) assert.Equal(t, correct, a) } func TestTransInv(t *testing.T) { a := []float64{1, 2, 3, 4} correct := make([]float64, 4) for i := range correct { correct[i] = a[i] - float64(1) } TransInv(a, 1) assert.Equal(t, correct, a) } func TestTransInvR(t *testing.T) { a := []float64{1, 2, 3, 4} correct := make([]float64, len(a)) for i := range correct { correct[i] = float64(1) - a[i] } TransInvR(a, 1) assert.Equal(t, correct, a) } func TestPowOf(t *testing.T) { a := []float64{1, 2, 3, 4} correct := make([]float64, len(a)) for i := range correct { correct[i] = math.Pow(a[i], 5) } PowOf(a, 5) assert.Equal(t, correct, a) } func TestPowOfR(t *testing.T) { a := []float64{1, 2, 3, 4} correct := make([]float64, len(a)) for i := range correct { correct[i] = math.Pow(5, a[i]) } PowOfR(a, 5) assert.Equal(t, correct, a) } func TestMax(t *testing.T) { a := []float64{0, 1, 2, 3, 4} b := []float64{5, 4, 2, 2, 1} correct := []float64{5, 4, 2, 3, 4} Max(a, b) assert.Equal(t, correct, a) b = []float64{2} f := func() { Max(a, b) } assert.Panics(t, f) } func TestMin(t *testing.T) { a := []float64{0, 1, 2, 3, 4} b := []float64{5, 4, 2, 2, 1} correct := []float64{0, 1, 2, 2, 1} Min(a, b) assert.Equal(t, correct, a) b = []float64{2} f := func() { Min(a, b) } assert.Panics(t, f) } func TestSum(t *testing.T) { a := []float64{0, 1, 2, 3, 4} correct := float64(10) got := Sum(a) if correct != got { t.Errorf("Expected %f. Got %v instead", correct, got) } } func TestMaxOf(t *testing.T) { a := []float64{0, 1, 2, 1, 0} correct := float64(2) got := MaxOf(a) if got != correct { t.Errorf("Expected %f. Got %v instead", correct, got) } a = []float64{} f := func() { MaxOf(a) } assert.Panics(t, f, "Expected panic when empty slice passed into MaxOf") } func TestMinOf(t *testing.T) { a := []float64{0, 1, 2, 1, 0} correct := float64(0) got := MinOf(a) if got != correct { t.Errorf("Expected %f. Got %v instead", correct, got) } a = []float64{} f := func() { MinOf(a) } assert.Panics(t, f, "Expected panic when empty slice passed into MinOf") } func TestArgmax(t *testing.T) { a := []float64{0, 1, 2, 34, 5} correct := 3 got := Argmax(a) if got != correct { t.Errorf("Expected argmax to be %v. Got %v instead", correct, got) } a = []float64{math.Inf(-1), 2, 3, 4} correct = 3 got = Argmax(a) if got != correct { t.Errorf("Expected argmax to be %v. Got %v instead", correct, got) } a = []float64{math.Inf(1), 2, 3, 4} correct = 0 got = Argmax(a) if got != correct { t.Errorf("Expected argmax to be %v. Got %v instead", correct, got) } a = []float64{1, math.NaN(), 3, 4} correct = 1 got = Argmax(a) if got != correct { t.Errorf("Expected argmax to be %v. Got %v instead", correct, got) } } func TestArgmin(t *testing.T) { a := []float64{0, 1, 2, -34, 5} correct := 3 got := Argmin(a) if got != correct { t.Errorf("Expected argmin to be %v. Got %v instead", correct, got) } a = []float64{math.Inf(-1), 2, 3, 4} correct = 0 got = Argmin(a) if got != correct { t.Errorf("Expected argmin to be %v. Got %v instead", correct, got) } a = []float64{math.Inf(1), 2, 3, 4} correct = 1 got = Argmin(a) if got != correct { t.Errorf("Expected argmin to be %v. Got %v instead", correct, got) } a = []float64{1, math.NaN(), 3, 4} correct = 1 got = Argmin(a) if got != correct { t.Errorf("Expected argmin to be %v. Got %v instead", correct, got) } } vecf64-0.9.0/asm.go000066400000000000000000000023171353362416300137350ustar00rootroot00000000000000// +build sse avx package vecf64 // Add performs a̅ + b̅. a̅ will be clobbered func Add(a, b []float64) { if len(a) != len(b) { panic("vectors must be the same length") } addAsm(a, b) } func addAsm(a, b []float64) // Sub performs a̅ - b̅. a̅ will be clobbered func Sub(a, b []float64) { if len(a) != len(b) { panic("vectors must be the same length") } subAsm(a, b) } func subAsm(a, b []float64) // Mul performs a̅ × b̅. a̅ will be clobbered func Mul(a, b []float64) { if len(a) != len(b) { panic("vectors must be the same length") } mulAsm(a, b) } func mulAsm(a, b []float64) // Div performs a̅ ÷ b̅. a̅ will be clobbered func Div(a, b []float64) { if len(a) != len(b) { panic("vectors must be the same length") } divAsm(a, b) } func divAsm(a, b []float64) // Sqrt performs √a̅ elementwise. a̅ will be clobbered func Sqrt(a []float64) // InvSqrt performs 1/√a̅ elementwise. a̅ will be clobbered func InvSqrt(a []float64) /* func Pow(a, b []float64) */ /* func Scale(s float64, a []float64) func ScaleFrom(s float64, a []float64) func Trans(s float64, a []float64) func TransFrom(s float64, a []float64) func Power(s float64, a []float64) func PowerFrom(s float64, a []float64) */ vecf64-0.9.0/asm_test.go000066400000000000000000000115771353362416300150040ustar00rootroot00000000000000// +build sse avx package vecf64 /* IMPORTANT NOTE: Currently Div does not handle division by zero correctly. It returns a NaN instead of +Inf */ import ( "math" "testing" "unsafe" "github.com/stretchr/testify/assert" ) // this file is mainly added to facilitate testing of the ASM code, and that it matches up correctly with the expected results func TestDiv(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime-1) correct := Range(0, niceprime-1) for i := range correct { correct[i] = correct[i] / correct[i] } Div(a, a) assert.Equal(correct[1:], a[1:]) assert.Equal(true, math.IsNaN(a[0]), "a[0] is: %v", a[0]) b := Range(niceprime, 2*niceprime-1) for i := range correct { correct[i] = a[i] / b[i] } Div(a, b) assert.Equal(correct[1:], a[1:]) assert.Equal(true, math.IsNaN(a[0]), "a[0] is: %v", a[0]) /* Weird Corner Cases*/ for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { b = Range(i, 2*i) correct = make([]float64, i) for j := range correct { correct[j] = a[j] / b[j] } Div(a, b) assert.Equal(correct[1:], a[1:]) } } } func TestSqrt(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime-1) correct := Range(0, niceprime-1) for i, v := range correct { correct[i] = math.Sqrt(v) } Sqrt(a) assert.Equal(correct, a) // negatives a = []float64{-1, -2, -3, -4} Sqrt(a) for _, v := range a { if !math.IsNaN(v) { t.Error("Expected NaN") } } /* Weird Corner Cases*/ for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { correct = make([]float64, i) for j := range correct { correct[j] = math.Sqrt(a[j]) } Sqrt(a) assert.Equal(correct, a) } } } func TestInvSqrt(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime-1) correct := Range(0, niceprime-1) for i, v := range correct { correct[i] = 1.0 / math.Sqrt(v) } InvSqrt(a) assert.Equal(correct[1:], a[1:]) if !math.IsInf(a[0], 0) { t.Error("1/0 should be +Inf or -Inf") } // Weird Corner Cases for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { correct = make([]float64, i) for j := range correct { correct[j] = 1.0 / math.Sqrt(a[j]) } InvSqrt(a) assert.Equal(correct[1:], a[1:], "i = %d, %v", i, Range(0, i)) if !math.IsInf(a[0], 0) { t.Error("1/0 should be +Inf or -Inf") } } } } /* BENCHMARKS */ func _vanillaVecAdd(a, b []float64) { for i := range a { a[i] += b[i] } } func BenchmarkVecAdd(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Add(x, y) } } func BenchmarkVanillaVecAdd(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecAdd(x, y) } } func _vanillaVecSub(a, b []float64) { for i := range a { a[i] -= b[i] } } func BenchmarkVecSub(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Sub(x, y) } } func BenchmarkVanillaVecSub(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecSub(x, y) } } func _vanillaVecMul(a, b []float64) { for i := range a { a[i] *= b[i] } } func BenchmarkVecMul(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Mul(x, y) } } func BenchmarkVanillaVecMul(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecMul(x, y) } } func _vanillaVecDiv(a, b []float64) { for i := range a { a[i] /= b[i] } } func BenchmarkVecDiv(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { Div(x, y) } } func BenchmarkVanillaVecDiv(b *testing.B) { x := Range(0, niceprime) y := Range(niceprime, 2*niceprime) for n := 0; n < b.N; n++ { _vanillaVecDiv(x, y) } } func _vanillaVecSqrt(a []float64) { for i, v := range a { a[i] = math.Sqrt(v) } } func BenchmarkVecSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { Sqrt(x) } } func BenchmarkVanillaVecSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { _vanillaVecSqrt(x) } } func _vanillaVecInverseSqrt(a []float64) { for i, v := range a { a[i] = 1.0 / math.Sqrt(v) } } func BenchmarkVecInvSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { InvSqrt(x) } } func BenchmarkVanillaVecInvSqrt(b *testing.B) { x := Range(0, niceprime) for n := 0; n < b.N; n++ { _vanillaVecInverseSqrt(x) } } vecf64-0.9.0/asm_vecAdd_avx.s000066400000000000000000000065771353362416300157320ustar00rootroot00000000000000// +build avx // +build amd64 /* This function adds two []float64 with some SIMD optimizations using AVX. Instead of doing this: for i := 0; i < len(a); i++ { a[i] += b[i] } Here, I use the term "pairs" to denote an element of `a` and and element of `b` that will be added together. a[i], b[i] is a pair. Using AVX, we can simultaneously add 16 pairs at the same time, which will look something like this: for i := 0; i < len(a); i+=4{ a[i:i+4] += b[i:i+4] // this code won't run. } AVX registers are 256 bits, meaning we can put 4 float64s in there. These are the registers I use to store the relevant information: SI - Used to store the top element of slice A (index 0). This register is incremented every loop DI - used to store the top element of slice B. Incremented every loop AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented. Y0, Y1 - YMM registers. X0, X1 - XMM registers. This pseudocode best explains the rather simple assembly: lenA := len(a) i := 0 loop: for { a[i:i+4*8] += b[i:i+4*8] lenA -= 4 i += 4 * 8 // 4 elements, 8 bytes each if lenA < 0{ break } } remainder4head: lenA += 4 if lenA == 0 { return } remainder2: for { a[i:i+2*8] += b[i:i+2*8] lenA -=2 i += 2 * 8 // 2 elements, 8 bytes each if lenA < 0{ break } } remainder1head: lenA += 2 if lenA == 0 { return } remainder1: for { a[i] += b[i] i+=8 // each element is 8 bytes lenA-- } return */ #include "textflag.h" // func addAsm(a, b []float64) TEXT ·addAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use detination index register for this MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap // each ymm register can take up to 4 float64s. SUBQ $4, AX JL remainder loop: // a[0] to a[3] // VMOVUPD 0(%rsi), %ymm0 // VMOVUPD 0(%rdi), %ymm1 // VADDPD %ymm0, %ymm1, %ymm0 // VMOVUPD %ymm0, 0(%rsi) BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x06 // vmovupd (%rsi),%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x0f // vmovupd (%rdi),%ymm1 BYTE $0xc5; BYTE $0xf5; BYTE $0x58; BYTE $0xc0 // vaddpd %ymm0,%ymm1,%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x11; BYTE $0x06 // vmovupd %ymm0,(%rsi) ADDQ $32, SI ADDQ $32, DI SUBQ $4, AX JGE loop remainder: ADDQ $4, AX JE done SUBQ $2, AX JL remainder1head remainder2: // VMOVUPD (SI), X0 // VMOVUPD (DI), X1 // VADDPD X0, X1, X0 // VMOVUPD X0, (SI) BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x06 // vmovupd (%rsi),%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x0f // vmovupd (%rdi),%xmm1 BYTE $0xc5; BYTE $0xf1; BYTE $0x58; BYTE $0xc0 // vaddpd %xmm0,%xmm1,%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x11; BYTE $0x06 // vmovupd %xmm0,(%rsi) ADDQ $16, SI ADDQ $16, DI SUBQ $2, AX JGE remainder2 remainder1head: ADDQ $2, AX JE done remainder1: // copy into the appropriate registers // VMOVSD (SI), X0 // VMOVSD (DI), X1 // VADDSD X0, X1, X0 // VMOVSD X0, (SI) BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x06 // vmovsd (%rsi), %xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x0f // vmovsd (%rdi), %xmm1 BYTE $0xc5; BYTE $0xf3; BYTE $0x58; BYTE $0xc0 // vaddsd %xmm0,%xmm1,%xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x11; BYTE $0x06 // vmovsd %xmm0,(%rsi) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecAdd_sse.s000066400000000000000000000017611353362416300157140ustar00rootroot00000000000000// +build sse // +build amd64 #include "textflag.h" // func addAsm(a, b []float64) TEXT ·addAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use destination index register for this MOVQ a_len+8(FP), AX // len(a) into AX // check if there are at least 8 elements SUBQ $8, AX JL remainder loop: // a[0] MOVAPD (SI), X0 MOVAPD (DI), X1 ADDPD X0, X1 MOVAPD X1, (SI) MOVAPD 16(SI), X2 MOVAPD 16(DI), X3 ADDPD X2, X3 MOVAPD X3, 16(SI) MOVAPD 32(SI), X4 MOVAPD 32(DI), X5 ADDPD X4, X5 MOVAPD X5, 32(SI) MOVAPD 48(SI), X6 MOVAPD 48(DI), X7 ADDPD X6, X7 MOVAPD X7, 48(SI) // update pointers. 4 registers, 2 elements each, 8 bytes per element ADDQ $64, SI ADDQ $64, DI // len(a) is now 4*2 elements less SUBQ $8, AX JGE loop remainder: ADDQ $8, AX JE done remainderloop: MOVSD (SI), X0 MOVSD (DI), X1 ADDSD X0, X1 MOVSD X1, (SI) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainderloop done: RET vecf64-0.9.0/asm_vecDiv_avx.s000066400000000000000000000077071353362416300157600ustar00rootroot00000000000000// +build avx // +build amd64 /* This function adds two []float64 with some SIMD optimizations using AVX. Instead of doing this: for i := 0; i < len(a); i++ { a[i] /= b[i] } Here, I use the term "pairs" to denote an element of `a` and and element of `b` that will be added together. a[i], b[i] is a pair. Using AVX, we can simultaneously add 4 pairs at the same time, which will look something like this: for i := 0; i < len(a); i+=4{ a[i:i+4] /= b[i:i+4] // this code won't run. } AVX registers are 256 bits, meaning we can put 4 float64s in there. These are the registers I use to store the relevant information: SI - Used to store the top element of slice A (index 0). This register is incremented every loop DI - used to store the top element of slice B. Incremented every loop AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented. Y0, Y1 - YMM registers. X0, X1 - XMM registers. With regards to VDIVPD and VSUBSD, it turns out that the description of these instructions are: VDIVPD ymm1, ymm2, ymm3: Subtract packed double-precision floating-point values in ymm3/mem from ymm2 and stores result in ymm1.[0] The description is written with intel's syntax (in this form: Dest, Src1, Src2). When converting to Go's ASM it becomes: (Src2, Src1, Dest) This pseudocode best explains the rather simple assembly: lenA := len(a) i := 0 loop: for { a[i:i+4*8] /= b[i:i+4*8] lenA -= 8 i += 4 * 8 // 4 elements, 8 bytes each if lenA < 0{ break } } remainder4head: lenA += 4 if lenA == 0 { return } remainder4: for { a[i:i+2*8] /= b[i:i+2*8] lenA -=2 i += 2 * 8 // 2 elements, 8 bytes each if lenA < 0{ break } } remainder1head: lenA += 2 if lenA == 0 { return } remainder1: for { a[i] /= b[i] i+=8 // each element is 8 bytes lenA-- } return Citation ======== [0]http://www.felixcloutier.com/x86/DIVPD.html */ #include "textflag.h" // func divAsm(a, b []float64) TEXT ·divAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use destination index register for this MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap SUBQ $4, AX JL remainder // each ymm register can take up to 4 float64s. // There are 8 ymm registers (8 pairs to do addition) available (TODO: check how to access the other 8 ymm registers without fucking things up) // Therefore a total of 16 elements can be processed at a time loop: // a[0] to a[3] // VMOVUPD (SI), Y0 // VMOVUPD (DI), Y1 // VDIVPD Y1, Y0, Y0 // VMOVUPD Y0, (SI) BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x06 // vmovupd (%rsi),%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x0f // vmovupd (%rdi),%ymm1 BYTE $0xc5; BYTE $0xfd; BYTE $0x5e; BYTE $0xc1 // vdivpd %ymm1,%ymm0,%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x11; BYTE $0x06 // vmovupd %ymm0,(%rsi) ADDQ $32, SI ADDQ $32, DI SUBQ $4, AX JGE loop remainder: ADDQ $4, AX JE done SUBQ $2, AX JL remainder1head remainder2: // VMOVUPD (SI), X0 // VMOVUPD (DI), X1 // VDIVPD X1, X0, X0 // VMOVUPD X0, (SI) BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x06 // vmovupd (%rsi),%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x0f // vmovupd (%rdi),%xmm1 BYTE $0xc5; BYTE $0xf9; BYTE $0x5e; BYTE $0xc1 // vdivpd %xmm1,%xmm0,%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x11; BYTE $0x06 // vmovupd %xmm0,(%rsi) ADDQ $16, SI ADDQ $16, DI SUBQ $2, AX JGE remainder2 remainder1head: ADDQ $2, AX JE done remainder1: // VMOVSD (SI), X0 // VMOVSD (DI), X1 // VDIVSD X1, X0, X0 // VMOVSD X0, (SI) BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x06 // vmovsd (%rsi),%xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x0f // vmovsd (%rdi),%xmm1 BYTE $0xc5; BYTE $0xfb; BYTE $0x5e; BYTE $0xc1 // vdivsd %xmm1,%xmm0,%xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x11; BYTE $0x06 // vmovsd %xmm0,(%rsi) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecDiv_sse.s000066400000000000000000000020541353362416300157420ustar00rootroot00000000000000// +build sse // +build amd64 #include "textflag.h" // func divAsm(a, b []float64) TEXT ·divAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use destination index register for this MOVQ a_len+8(FP), AX // len(a) into AX // check if there are at least 8 elements SUBQ $8, AX JL remainder loop: // a[0] MOVAPD (SI), X0 MOVAPD (DI), X1 DIVPD X1, X0 MOVAPD X0, (SI) MOVAPD 16(SI), X2 MOVAPD 16(DI), X3 DIVPD X3, X2 MOVAPD X2, 16(SI) MOVAPD 32(SI), X4 MOVAPD 32(DI), X5 DIVPD X5, X4 MOVAPD X4, 32(SI) MOVAPD 48(SI), X6 MOVAPD 48(DI), X7 DIVPD X7, X6 MOVAPD X6, 48(SI) // update pointers. 4 registers, 2 elements each, 8 bytes per element ADDQ $64, SI ADDQ $64, DI // len(a) is now 4*2 elements less SUBQ $8, AX JGE loop remainder: ADDQ $8, AX JE done remainderloop: // copy into the appropriate registers MOVSD (SI), X0 MOVSD (DI), X1 DIVSD X1, X0 // save it back MOVSD X0, (SI) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainderloop done: RET vecf64-0.9.0/asm_vecExp_avx.s000066400000000000000000000000671353362416300157620ustar00rootroot00000000000000// +build avx, sse // +build amd64 // +build !fastmath vecf64-0.9.0/asm_vecInvSqrt_avx.s000066400000000000000000000037671353362416300166460ustar00rootroot00000000000000// +build avx // +build amd64 // +build !fastmath /* InvSqrt is a function that inverse square roots (1/√x) each element in a []float64 Because of the way VBROADCASTSD works, we first backup the first element of the slice into a register, BX. Meanwhile, we replace the first element with a constant 1.0. This is done so that we can broadcast the constant into the Y1 register. After 1.0 has been broadcasted into Y1, we move the value back into the top of the slice. The following is then performed: Y0 = Sqrt(a[i:i+4]) Y0 = Y1/Y0 And the standard looping thing happens */ #include "textflag.h" #define one 0x3ff0000000000000 // func InvSqrt(a []float64) TEXT ·InvSqrt(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ SI, CX MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap // make sure that len(a) >= 1 XORQ BX, BX CMPQ BX, AX JGE done MOVQ $one, DX SUBQ $4, AX JL remainder // store the first element in BX // This is done so that we can move 1.0 into the first element of the slice // because AVX instruction vbroadcastsd can only read from memory location not from registers MOVQ (SI), BX // load 1.0 into the first element MOVQ DX, (SI) // VBROADCASTSD (SI), Y1 BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x19; BYTE $0x0e // vbroadcastsd (%rbx),%ymm1 // now that we're done with the ghastly business of trying to broadcast 1.0 without using any extra memory... // we restore the first element MOVQ BX, (SI) loop: // a[0] to a[3] // VSQRTPD (SI), Y0 // VDIVPD Y0, Y1, Y0 // VMOVUPD Y0, (SI) BYTE $0xc5; BYTE $0xfd; BYTE $0x51; BYTE $0x06 // vsqrtpd (%rsi),%ymm0 BYTE $0xc5; BYTE $0xf5; BYTE $0x5e; BYTE $0xc0 // vdivpd %ymm0, %ymm1, %ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x11; BYTE $0x06 // vmovupd %ymm0,(%rsi) ADDQ $32, SI SUBQ $4, AX JGE loop remainder: ADDQ $4, AX JE done remainder1: MOVQ DX, X1 MOVSD (SI), X0 SQRTSD X0, X0 DIVSD X0, X1 MOVSD X1, (SI) ADDQ $8, SI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecInvSqrt_sse.s000066400000000000000000000024321353362416300166260ustar00rootroot00000000000000// +build sse // +build amd64 // +build !fastmath /* InvSqrt is a function that inverse square roots (1/√x) each element in a []float64 The SSE version uses SHUFPD to "broadcast" the 1.0 constant to the X1 register. The rest proceeds as expected */ #include "textflag.h" #define one 0x3ff0000000000000 // func InvSqrt(a []float64) TEXT ·InvSqrt(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ SI, CX MOVQ a_len+8(FP), AX // len(a) into AX // make sure that len(a) >= 1 XORQ BX, BX CMPQ BX, AX JGE done MOVQ $one, DX SUBQ $2, AX JL remainder // back up the first element of the slice MOVQ (SI), BX MOVQ DX, (SI) // broadcast 1.0 to all elements of X1 // 0x00 shuffles the least significant bits of the X1 reg, which means the first element is repeated MOVUPD (SI), X1 SHUFPD $0x00, X1, X1 MOVAPD X1, X2 // backup, because X1 will get clobbered in DIVPD // restore the first element now we're done MOVQ BX, (SI) loop: MOVAPD X2, X1 SQRTPD (SI), X0 DIVPD X0, X1 MOVUPD X1, (SI) // we processed 2 elements. Each element is 8 bytes. So jump 16 ahead ADDQ $16, SI SUBQ $2, AX JGE loop remainder: ADDQ $2, AX JE done remainder1: MOVQ DX, X1 MOVSD (SI), X0 SQRTSD X0, X0 DIVSD X0, X1 MOVSD X1, (SI) ADDQ $8, SI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecMul_avx.s000066400000000000000000000064351353362416300157700ustar00rootroot00000000000000// +build avx // +build amd64 /* Mul multiplies two []float64 with some SIMD optimizations using AVX. Instead of doing this: for i := 0; i < len(a); i++ { a[i] *= b[i] } Here, I use the term "pairs" to denote an element of `a` and and element of `b` that will be added together. a[i], b[i] is a pair. Using AVX, we can simultaneously add 16 pairs at the same time, which will look something like this: for i := 0; i < len(a); i+=4{ a[i:i+4] += b[i:i+4] // this code won't run. } AVX registers are 256 bits, meaning we can put 4 float64s in there. These are the registers I use to store the relevant information: SI - Used to store the top element of slice A (index 0). This register is incremented every loop DI - used to store the top element of slice B. Incremented every loop AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented. Y0, Y1 - YMM registers. X0, X1 - XMM registers. This pseudocode best explains the rather simple assembly: lenA := len(a) i := 0 loop: for { a[i:i+4*8] *= b[i:i+4*8] lenA -= 4 i += 4 * 8 // 4 elements, 8 bytes each if lenA < 0{ break } } remainder4head: lenA += 4 if lenA == 0 { return } remainder2: for { a[i:i+2*8] *= b[i:i+2*8] lenA -=2 i += 2 * 8 // 2 elements, 8 bytes each if lenA < 0{ break } } remainder1head: lenA += 2 if lenA == 0 { return } remainder1: for { a[i] *= b[i] i+=8 // each element is 8 bytes lenA-- } return */ #include "textflag.h" // func mulAsm(a, b []float64) TEXT ·mulAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use detination index register for this MOVQ a_len+8(FP), AX // len(a) into AX // each ymm register can take up to 4 float64s. SUBQ $4, AX JL remainder loop: // a[0] to a[3] // VMOVUPD 0(SI), Y0 // VMOVUPD 0(DI), Y1 // VMULPD Y0, Y1, Y0 // VMOVUPD Y0, 0(SI) BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x06 // vmovupd (%rsi),%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x0f // vmovupd (%rdi),%ymm1 BYTE $0xc5; BYTE $0xf5; BYTE $0x59; BYTE $0xc0 // vmulpd %ymm0,%ymm1,%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x11; BYTE $0x06 // vmovupd %ymm0,(%rsi) ADDQ $32, SI ADDQ $32, DI SUBQ $4, AX JGE loop remainder: ADDQ $4, AX JE done SUBQ $2, AX JL remainder1head remainder2: // VMOVUPD (SI), X0 // VMOVUPD (DI), X1 // VMULPD X0, X1, X0 // VMOVUPD X0, (SI) BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x06 // vmovupd (%rsi),%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x0f // vmovupd (%rdi),%xmm1 BYTE $0xc5; BYTE $0xf1; BYTE $0x59; BYTE $0xc0 // vmulpd %xmm0,%xmm1,%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x11; BYTE $0x06 // vmovupd %xmm0,(%rsi) ADDQ $16, SI ADDQ $16, DI SUBQ $2, AX JGE remainder2 remainder1head: ADDQ $2, AX JE done remainder1: // copy into the appropriate registers // VMOVSD (SI), X0 // VMOVSD (DI), X1 // VADDSD X0, X1, X0 // VMOVSD X0, (SI) BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x06 // vmovsd (%rsi),%xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x0f // vmovsd (%rdi),%xmm1 BYTE $0xc5; BYTE $0xf3; BYTE $0x59; BYTE $0xc0 // vmulsd %xmm0,%xmm1,%xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x11; BYTE $0x06 // vmovsd %xmm0,(%rsi) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecMul_sse.s000066400000000000000000000017671353362416300157670ustar00rootroot00000000000000// +build sse // +build amd64 #include "textflag.h" // func mulAsm(a, b []float64) TEXT ·mulAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use destination index register for this MOVQ a_len+8(FP), AX // len(a) into AX // check if there are at least 8 elements SUBQ $8, AX JL remainder loop: // a[0] MOVAPD (SI), X0 MOVAPD (DI), X1 MULPD X0, X1 MOVAPD X1, (SI) MOVAPD 16(SI), X2 MOVAPD 16(DI), X3 MULPD X2, X3 MOVAPD X3, 16(SI) MOVAPD 32(SI), X4 MOVAPD 32(DI), X5 MULPD X4, X5 MOVAPD X5, 32(SI) MOVAPD 48(SI), X6 MOVAPD 48(DI), X7 MULPD X6, X7 MOVAPD X7, 48(SI) // update pointers. 4 registers, 2 elements at once, each element is 8 bytes ADDQ $64, SI ADDQ $64, DI // len(a) is now 4*2 elements less SUBQ $8, AX JGE loop remainder: ADDQ $8, AX JE done remainderloop: MOVSD (SI), X0 MOVSD (DI), X1 MULSD X0, X1 MOVSD X1, (SI) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainderloop done: RET vecf64-0.9.0/asm_vecSqrt_avx.s000066400000000000000000000021121353362416300161500ustar00rootroot00000000000000// +build avx // +build amd64 // +build !fastmath /* Sqrt takes a []float32 and square roots every element in the slice. */ #include "textflag.h" // func Sqrt(a []float64) TEXT ·Sqrt(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ SI, CX MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap SUBQ $4, AX JL remainder loop: // a[0] to a[3] // VSQRTPD (SI), Y0 // VMOVUPD Y0, (SI) BYTE $0xc5; BYTE $0xfd; BYTE $0x51; BYTE $0x06 // vsqrtpd (%rsi),%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x11; BYTE $0x06 // vmovupd %ymm0,(%rsi) ADDQ $32, SI SUBQ $4, AX JGE loop remainder: ADDQ $4, AX JE done SUBQ $2, AX JL remainder1head remainder2: // VSQRTPS (SI), X0 // VMOVUPS X0, (SI) BYTE $0xc5; BYTE $0xf9; BYTE $0x51; BYTE $0x06 // vsqrtpd (%rsi),%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x11; BYTE $0x06 // vmovupd %xmm0,(%rsi) ADDQ $16, SI SUBQ $2, AX JGE remainder2 remainder1head: ADDQ $2, AX JE done remainder1: MOVSD (SI), X0 SQRTSD X0, X0 MOVSD X0, (SI) ADDQ $8, SI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecSqrt_sse.s000066400000000000000000000012511353362416300161470ustar00rootroot00000000000000// +build sse // +build amd64 // +build !fastmath /* Sqrt takes a []float32 and square roots every element in the slice. */ #include "textflag.h" // func Sqrt(a []float64) TEXT ·Sqrt(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ SI, CX MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap SUBQ $2, AX JL remainder loop: SQRTPD (SI), X0 MOVUPD X0, (SI) // we processed 2 elements. Each element is 8 bytes. So jump 16 ahead ADDQ $16, SI SUBQ $2, AX JGE loop remainder: ADDQ $2, AX JE done remainder1: MOVSD (SI), X0 SQRTSD X0, X0 MOVSD X0, (SI) ADDQ $8, SI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecSub_avx.s000066400000000000000000000076211353362416300157620ustar00rootroot00000000000000// +build avx // +build amd64 /* Sub subtracts two []float64 with some SIMD optimizations using AVX. Instead of doing this: for i := 0; i < len(a); i++ { a[i] -= b[i] } Here, I use the term "pairs" to denote an element of `a` and and element of `b` that will be added together. a[i], b[i] is a pair. Using AVX, we can simultaneously add 4 pairs at the same time, which will look something like this: for i := 0; i < len(a); i+=4{ a[i:i+4] -= b[i:i+4] // this code won't run. } These are the registers I use to store the relevant information: SI - Used to store the top element of slice A (index 0). This register is incremented every loop DI - used to store the top element of slice B. Incremented every loop AX - len(a) is stored in here. Volatile register. AX is also used as the "working" count of the length that is decremented. AX - len(a) is stored in here. AX is also used as the "working" count of the length that is decremented. Y0, Y1 - YMM registers. X0, X1 - XMM registers. With regards to VSUBPD and VSUBSD, it turns out that the description of these instructions are: VSUBPD ymm1, ymm2, ymm3: Subtract packed double-precision floating-point values in ymm3/mem from ymm2 and stores result in ymm1.[0] The description is written with intel's syntax (in this form: Dest, Src1, Src2). When converting to Go's ASM it becomes: (Src2, Src1, Dest) This pseudocode best explains the rather simple assembly: lenA := len(a) i := 0 loop: for { a[i:i+4*8] -= b[i:i+4*8] lenA -= 4 i += 4*8 // 4 elements, 8 bytes each if lenA < 0{ break } } remainder2head: lenA += 4 if lenA == 0 { return } remainder4: for { a[i:i+2*4] -= b[i:i+2*4] lenA -=2 i += 2 * 8 // 2 elements, 8 bytes each if lenA < 0{ break } } remainder1head: lenA += 4 if lenA == 0 { return } remainder1: for { a[i] -= b[i] i+=8 // each element is 8 bytes lenA-- } return Citation ======== [0]http://www.felixcloutier.com/x86/SUBPD.html */ #include "textflag.h" // func subAsm(a, b []float64) TEXT ·subAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use destination index register for this MOVQ a_len+8(FP), AX // len(a) into AX MOVQ AX, AX // len(a) into AX for working purposes // each ymm register can take up to 4 float64s. SUBQ $4, AX JL remainder loop: // a[0] to a[3] // VMOVUPD (SI), Y0 // VMOVUPD (DI), Y1 // VSUBPD Y1, Y0, Y0 // VMOVUPD Y0, (SI) BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x06 // vmovupd 0(%rsi),%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x10; BYTE $0x0f // vmovupd 0(%rdi),%ymm1 BYTE $0xc5; BYTE $0xfd; BYTE $0x5c; BYTE $0xc1 // vsubpd %ymm1,%ymm0,%ymm0 BYTE $0xc5; BYTE $0xfd; BYTE $0x11; BYTE $0x06 // vmovupd %ymm0, 0(%rsi) // 4 elements processed. Each element is 8 bytes. So jump 32 bytes ADDQ $32, SI ADDQ $32, DI SUBQ $4, AX JGE loop remainder: ADDQ $4, AX JE done SUBQ $2, AX JL remainder1head remainder2: // VMOVUPD (SI), X0 // VMOVUPD (DI), X1 // VSUBPD X1, X0, X0 // VMOVUPD X0, (SI) BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x06 // vmovupd 0(%rsi),%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x10; BYTE $0x0f // vmovupd 0(%rdi),%xmm1 BYTE $0xc5; BYTE $0xf9; BYTE $0x5c; BYTE $0xc1 // vsubpd %xmm1,%xmm0,%xmm0 BYTE $0xc5; BYTE $0xf9; BYTE $0x11; BYTE $0x06 // vmovupd %xmm0, 0(%rsi) ADDQ $16, SI ADDQ $16, DI SUBQ $2, AX JGE remainder2 remainder1head: ADDQ $2, AX JE done remainder1: // copy into the appropriate registers // VMOVSD (SI), X0 // VMOVSD (DI), X1 // VSUBSD X1, X0, X0 // VMOVSD X0, (SI) BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x06 // vmovsd 0(%rsi),%xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x10; BYTE $0x0f // vmovsd 0(%rdi),%xmm1 BYTE $0xc5; BYTE $0xfb; BYTE $0x5c; BYTE $0xc1 // vsubsd %xmm1,%xmm0,%xmm0 BYTE $0xc5; BYTE $0xfb; BYTE $0x11; BYTE $0x06 // vmovsd %xmm0, 0(%rsi) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainder1 done: RET vecf64-0.9.0/asm_vecSub_sse.s000066400000000000000000000021351353362416300157510ustar00rootroot00000000000000// +build sse // +build amd64 #include "textflag.h" // func subAsm(a, b []float64) TEXT ·subAsm(SB), NOSPLIT, $0 MOVQ a_data+0(FP), SI MOVQ b_data+24(FP), DI // use destination index register for this MOVQ a_len+8(FP), AX // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap SUBQ $8, AX // 8 items or more? JL remainder loop: // a[0] MOVAPD (SI), X0 MOVAPD (DI), X1 SUBPD X1, X0 MOVAPD X0, (SI) MOVAPD 16(SI), X2 MOVAPD 16(DI), X3 SUBPD X3, X2 MOVAPD X2, 16(SI) MOVAPD 32(SI), X4 MOVAPD 32(DI), X5 SUBPD X5, X4 MOVAPD X4, 32(SI) MOVAPD 48(SI), X6 MOVAPD 48(DI), X7 SUBPD X7, X6 MOVAPD X6, 48(SI) // update pointers (4 * 2 * 8) - 2*2 elements each time, each element is 8 bytes ADDQ $64, SI ADDQ $64, DI // len(a) is now 8 less SUBQ $8, AX JGE loop remainder: ADDQ $8, AX JE done remainderloop: // copy into the appropriate registers MOVSD (SI), X0 MOVSD (DI), X1 SUBSD X1, X0 // save it back MOVSD X0, (SI) // update pointer to the top of the data ADDQ $8, SI ADDQ $8, DI DECQ AX JNE remainderloop done: RET vecf64-0.9.0/bench.sh000077500000000000000000000011321353362416300142360ustar00rootroot00000000000000set -ex benchtime=${1:-1s} go test -bench . -benchtime $benchtime go test -tags='sse' -bench . -benchtime $benchtime go test -tags='avx' -bench . -benchtime $benchtime # travis compiles commands in script and then executes in bash. By adding # set -e we are changing the travis build script's behavior, and the set # -e lives on past the commands we are providing it. Some of the travis # commands are supposed to exit with non zero status, but then continue # executing. set -x makes the travis log files extremely verbose and # difficult to understand. # # see travis-ci/travis-ci#5120 set +ex vecf64-0.9.0/doc.go000066400000000000000000000054431353362416300137250ustar00rootroot00000000000000// Package vecf64 provides common functions and methods for slices of float64. // // Name // // In the days of yore, scientists who computed with computers would use arrays to represent vectors, each value representing // magnitude and/or direction. Then came the C++ Standard Templates Library, which sought to provide this data type in the standard // library. Now, everyone conflates a term "vector" with dynamic arrays. // // In the C++ book, Bjarne Stroustrup has this to say: // One could argue that valarray should have been called vector because is is a traditional mathematical vector // and that vector should have been called array. // However, this is not the way that the terminology evolved. // A valarray is a vector optimized for numeric computation; // a vector is a flexible container designed for holding and manipulating objects of a wide variety of types; // and an array is a low-level built-in type // // Go has a better name for representing dynamically allocated arrays of any type - "slice". However, "slice" is both a noun and verb // and many libraries that I use already use "slice"-as-a-verb as a name, so I had to settle for the second best name: "vector". // // It should be noted that while the names used in this package were definitely mathematically inspired, they bear only little resemblance // the actual mathematical operations performed. // // Naming Convention // // The names of the operations assume you're working with slices of float64s. Hence `Add` performs elementwise addition between two []float64. // // Operations between []float64 and float64 are also supported, however they are differently named. Here are the equivalents: /* +------------------------+--------------------------------------+ | []float64-[]float64 Op | []float64-float64 Op | +------------------------+--------------------------------------+ | Add(a, b []float64) | Trans(a float64, b []float64) | | Sub(a, b []float64) | Trans/TransR(a float64, b []float64) | | Mul(a, b []float64) | Scale(a float64, b []float64) | | Div(a, b []float64) | Scale/DivR(a float64, b []float64) | | Pow(a, b []float64) | PowOf/PowOfR(a float64, b []float64) | +------------------------+--------------------------------------+ */ // You may note that for the []float64 - float64 binary operations, the scalar (float64) is always the first operand. In operations // that are not commutative, an additional function is provided, suffixed with "R" (for reverse) // // Range Check and BCE // // This package does not provide range checking. If indices are out of range, the functions will panic. This package should play well with BCE. // // TODO: provide SIMD vectorization for Incr and []float32-float64 functions. // Pull requests accepted package vecf64 // import "gorgonia.org/vecf64" vecf64-0.9.0/final.cover000066400000000000000000000375421353362416300147670ustar00rootroot00000000000000mode: set github.com/NDari/vecf64/arith.go:7.26,10.22 3 1 github.com/NDari/vecf64/arith.go:10.22,11.15 1 1 github.com/NDari/vecf64/arith.go:12.10,13.21 1 1 github.com/NDari/vecf64/arith.go:14.10,15.12 1 1 github.com/NDari/vecf64/arith.go:16.10,17.16 1 1 github.com/NDari/vecf64/arith.go:18.10,19.20 1 1 github.com/NDari/vecf64/arith.go:20.11,21.28 1 1 github.com/NDari/vecf64/arith.go:26.26,29.22 3 0 github.com/NDari/vecf64/arith.go:29.22,31.3 1 0 github.com/NDari/vecf64/arith.go:36.36,37.22 1 1 github.com/NDari/vecf64/arith.go:37.22,39.3 1 1 github.com/NDari/vecf64/arith.go:44.39,46.2 1 1 github.com/NDari/vecf64/arith.go:50.40,51.22 1 1 github.com/NDari/vecf64/arith.go:51.22,53.3 1 1 github.com/NDari/vecf64/arith.go:58.36,59.22 1 1 github.com/NDari/vecf64/arith.go:59.22,61.3 1 1 github.com/NDari/vecf64/arith.go:66.39,68.2 1 1 github.com/NDari/vecf64/arith.go:72.40,73.22 1 1 github.com/NDari/vecf64/arith.go:73.22,75.3 1 1 github.com/NDari/vecf64/arith.go:80.36,81.22 1 1 github.com/NDari/vecf64/arith.go:81.22,83.3 1 1 github.com/NDari/vecf64/arith.go:88.37,89.22 1 1 github.com/NDari/vecf64/arith.go:89.22,91.3 1 1 github.com/NDari/vecf64/arith.go:95.26,99.22 3 1 github.com/NDari/vecf64/arith.go:99.22,101.13 2 1 github.com/NDari/vecf64/arith.go:101.13,103.4 1 1 github.com/NDari/vecf64/arith.go:108.26,112.22 3 1 github.com/NDari/vecf64/arith.go:112.22,114.13 2 1 github.com/NDari/vecf64/arith.go:114.13,116.4 1 1 github.com/NDari/vecf64/arith.go:123.31,125.2 1 1 github.com/NDari/vecf64/arith.go:128.42,129.16 1 1 github.com/NDari/vecf64/arith.go:132.2,132.36 1 1 github.com/NDari/vecf64/arith.go:129.16,130.49 1 1 github.com/NDari/vecf64/arith.go:136.42,137.16 1 1 github.com/NDari/vecf64/arith.go:140.2,140.36 1 1 github.com/NDari/vecf64/arith.go:137.16,138.49 1 1 github.com/NDari/vecf64/arith.go:144.30,148.22 4 1 github.com/NDari/vecf64/arith.go:169.2,169.12 1 1 github.com/NDari/vecf64/arith.go:148.22,149.11 1 1 github.com/NDari/vecf64/arith.go:158.3,158.40 1 1 github.com/NDari/vecf64/arith.go:164.3,164.12 1 1 github.com/NDari/vecf64/arith.go:149.11,154.12 4 1 github.com/NDari/vecf64/arith.go:158.40,161.9 3 1 github.com/NDari/vecf64/arith.go:164.12,167.4 2 1 github.com/NDari/vecf64/arith.go:173.30,177.22 4 1 github.com/NDari/vecf64/arith.go:198.2,198.12 1 1 github.com/NDari/vecf64/arith.go:177.22,178.11 1 1 github.com/NDari/vecf64/arith.go:187.3,187.41 1 1 github.com/NDari/vecf64/arith.go:193.3,193.12 1 1 github.com/NDari/vecf64/arith.go:178.11,183.12 4 1 github.com/NDari/vecf64/arith.go:187.41,190.9 3 1 github.com/NDari/vecf64/arith.go:193.12,196.4 2 1 github.com/NDari/vecf64/arith.go:204.35,204.51 1 1 github.com/NDari/vecf64/arith.go:210.35,211.12 1 1 github.com/NDari/vecf64/arith.go:214.3,214.11 1 1 github.com/NDari/vecf64/arith.go:211.12,213.4 1 1 github.com/NDari/vecf64/arith.go:217.35,218.12 1 1 github.com/NDari/vecf64/arith.go:221.3,221.11 1 1 github.com/NDari/vecf64/arith.go:218.12,220.4 1 1 github.com/NDari/vecf64/go.go:8.26,11.22 3 1 github.com/NDari/vecf64/go.go:11.22,13.3 1 1 github.com/NDari/vecf64/go.go:17.26,20.22 3 1 github.com/NDari/vecf64/go.go:20.22,22.3 1 1 github.com/NDari/vecf64/go.go:26.26,29.22 3 1 github.com/NDari/vecf64/go.go:29.22,31.3 1 1 github.com/NDari/vecf64/go.go:35.26,38.22 3 1 github.com/NDari/vecf64/go.go:38.22,39.16 1 1 github.com/NDari/vecf64/go.go:44.3,44.18 1 1 github.com/NDari/vecf64/go.go:39.16,41.12 2 1 github.com/NDari/vecf64/go.go:49.24,50.22 1 1 github.com/NDari/vecf64/go.go:50.22,52.3 1 1 github.com/NDari/vecf64/go.go:56.27,57.22 1 1 github.com/NDari/vecf64/go.go:57.22,59.3 1 1 github.com/NDari/vecf64/incr.go:6.36,10.22 4 1 github.com/NDari/vecf64/incr.go:10.22,12.3 1 1 github.com/NDari/vecf64/incr.go:16.36,20.22 4 1 github.com/NDari/vecf64/incr.go:20.22,22.3 1 1 github.com/NDari/vecf64/incr.go:26.36,30.22 4 1 github.com/NDari/vecf64/incr.go:30.22,32.3 1 1 github.com/NDari/vecf64/incr.go:35.36,39.22 4 1 github.com/NDari/vecf64/incr.go:39.22,40.16 1 1 github.com/NDari/vecf64/incr.go:44.3,44.22 1 1 github.com/NDari/vecf64/incr.go:40.16,42.12 2 1 github.com/NDari/vecf64/incr.go:49.36,53.22 4 1 github.com/NDari/vecf64/incr.go:53.22,54.15 1 1 github.com/NDari/vecf64/incr.go:55.10,56.13 1 1 github.com/NDari/vecf64/incr.go:57.10,58.16 1 1 github.com/NDari/vecf64/incr.go:59.10,60.20 1 1 github.com/NDari/vecf64/incr.go:61.10,62.24 1 1 github.com/NDari/vecf64/incr.go:63.11,64.32 1 1 github.com/NDari/vecf64/incr.go:70.36,75.22 4 0 github.com/NDari/vecf64/incr.go:75.22,77.3 1 0 github.com/NDari/vecf64/incr.go:82.56,85.22 3 1 github.com/NDari/vecf64/incr.go:85.22,87.3 1 1 github.com/NDari/vecf64/incr.go:92.59,94.2 1 1 github.com/NDari/vecf64/incr.go:98.60,101.22 3 1 github.com/NDari/vecf64/incr.go:101.22,103.3 1 1 github.com/NDari/vecf64/incr.go:108.56,111.22 3 1 github.com/NDari/vecf64/incr.go:111.22,113.3 1 1 github.com/NDari/vecf64/incr.go:118.59,120.2 1 1 github.com/NDari/vecf64/incr.go:124.60,127.22 3 1 github.com/NDari/vecf64/incr.go:127.22,129.3 1 1 github.com/NDari/vecf64/incr.go:134.56,137.22 3 1 github.com/NDari/vecf64/incr.go:137.22,139.3 1 1 github.com/NDari/vecf64/incr.go:144.57,147.22 3 1 github.com/NDari/vecf64/incr.go:147.22,149.3 1 1 github.com/NDari/vecf64/utils.go:4.38,7.17 3 1 github.com/NDari/vecf64/utils.go:12.2,12.14 1 1 github.com/NDari/vecf64/utils.go:16.2,17.47 2 1 github.com/NDari/vecf64/utils.go:26.2,26.10 1 1 github.com/NDari/vecf64/utils.go:7.17,10.3 2 0 github.com/NDari/vecf64/utils.go:12.14,13.64 1 0 github.com/NDari/vecf64/utils.go:17.47,20.11 2 1 github.com/NDari/vecf64/utils.go:20.11,22.4 1 1 github.com/NDari/vecf64/utils.go:22.4,24.4 1 0 github.com/NDari/vecf64/utils.go:30.87,32.17 2 1 github.com/NDari/vecf64/utils.go:36.2,36.22 1 1 github.com/NDari/vecf64/utils.go:39.2,39.8 1 1 github.com/NDari/vecf64/utils.go:32.17,34.3 1 0 github.com/NDari/vecf64/utils.go:36.22,38.3 1 1 github.com/NDari/vecf64/arith.go:7.26,10.22 3 1 github.com/NDari/vecf64/arith.go:10.22,11.15 1 1 github.com/NDari/vecf64/arith.go:12.10,13.21 1 1 github.com/NDari/vecf64/arith.go:14.10,15.12 1 1 github.com/NDari/vecf64/arith.go:16.10,17.16 1 1 github.com/NDari/vecf64/arith.go:18.10,19.20 1 1 github.com/NDari/vecf64/arith.go:20.11,21.28 1 1 github.com/NDari/vecf64/arith.go:26.26,29.22 3 0 github.com/NDari/vecf64/arith.go:29.22,31.3 1 0 github.com/NDari/vecf64/arith.go:36.36,37.22 1 1 github.com/NDari/vecf64/arith.go:37.22,39.3 1 1 github.com/NDari/vecf64/arith.go:44.39,46.2 1 1 github.com/NDari/vecf64/arith.go:50.40,51.22 1 1 github.com/NDari/vecf64/arith.go:51.22,53.3 1 1 github.com/NDari/vecf64/arith.go:58.36,59.22 1 1 github.com/NDari/vecf64/arith.go:59.22,61.3 1 1 github.com/NDari/vecf64/arith.go:66.39,68.2 1 1 github.com/NDari/vecf64/arith.go:72.40,73.22 1 1 github.com/NDari/vecf64/arith.go:73.22,75.3 1 1 github.com/NDari/vecf64/arith.go:80.36,81.22 1 1 github.com/NDari/vecf64/arith.go:81.22,83.3 1 1 github.com/NDari/vecf64/arith.go:88.37,89.22 1 1 github.com/NDari/vecf64/arith.go:89.22,91.3 1 1 github.com/NDari/vecf64/arith.go:95.26,99.22 3 1 github.com/NDari/vecf64/arith.go:99.22,101.13 2 1 github.com/NDari/vecf64/arith.go:101.13,103.4 1 1 github.com/NDari/vecf64/arith.go:108.26,112.22 3 1 github.com/NDari/vecf64/arith.go:112.22,114.13 2 1 github.com/NDari/vecf64/arith.go:114.13,116.4 1 1 github.com/NDari/vecf64/arith.go:123.31,125.2 1 1 github.com/NDari/vecf64/arith.go:128.42,129.16 1 1 github.com/NDari/vecf64/arith.go:132.2,132.36 1 1 github.com/NDari/vecf64/arith.go:129.16,130.49 1 1 github.com/NDari/vecf64/arith.go:136.42,137.16 1 1 github.com/NDari/vecf64/arith.go:140.2,140.36 1 1 github.com/NDari/vecf64/arith.go:137.16,138.49 1 1 github.com/NDari/vecf64/arith.go:144.30,148.22 4 1 github.com/NDari/vecf64/arith.go:169.2,169.12 1 1 github.com/NDari/vecf64/arith.go:148.22,149.11 1 1 github.com/NDari/vecf64/arith.go:158.3,158.40 1 1 github.com/NDari/vecf64/arith.go:164.3,164.12 1 1 github.com/NDari/vecf64/arith.go:149.11,154.12 4 1 github.com/NDari/vecf64/arith.go:158.40,161.9 3 1 github.com/NDari/vecf64/arith.go:164.12,167.4 2 1 github.com/NDari/vecf64/arith.go:173.30,177.22 4 1 github.com/NDari/vecf64/arith.go:198.2,198.12 1 1 github.com/NDari/vecf64/arith.go:177.22,178.11 1 1 github.com/NDari/vecf64/arith.go:187.3,187.41 1 1 github.com/NDari/vecf64/arith.go:193.3,193.12 1 1 github.com/NDari/vecf64/arith.go:178.11,183.12 4 1 github.com/NDari/vecf64/arith.go:187.41,190.9 3 1 github.com/NDari/vecf64/arith.go:193.12,196.4 2 1 github.com/NDari/vecf64/arith.go:204.35,204.51 1 1 github.com/NDari/vecf64/arith.go:210.35,211.12 1 1 github.com/NDari/vecf64/arith.go:214.3,214.11 1 1 github.com/NDari/vecf64/arith.go:211.12,213.4 1 1 github.com/NDari/vecf64/arith.go:217.35,218.12 1 1 github.com/NDari/vecf64/arith.go:221.3,221.11 1 1 github.com/NDari/vecf64/arith.go:218.12,220.4 1 1 github.com/NDari/vecf64/incr.go:6.36,10.22 4 1 github.com/NDari/vecf64/incr.go:10.22,12.3 1 1 github.com/NDari/vecf64/incr.go:16.36,20.22 4 1 github.com/NDari/vecf64/incr.go:20.22,22.3 1 1 github.com/NDari/vecf64/incr.go:26.36,30.22 4 1 github.com/NDari/vecf64/incr.go:30.22,32.3 1 1 github.com/NDari/vecf64/incr.go:35.36,39.22 4 1 github.com/NDari/vecf64/incr.go:39.22,40.16 1 1 github.com/NDari/vecf64/incr.go:44.3,44.22 1 1 github.com/NDari/vecf64/incr.go:40.16,42.12 2 1 github.com/NDari/vecf64/incr.go:49.36,53.22 4 1 github.com/NDari/vecf64/incr.go:53.22,54.15 1 1 github.com/NDari/vecf64/incr.go:55.10,56.13 1 1 github.com/NDari/vecf64/incr.go:57.10,58.16 1 1 github.com/NDari/vecf64/incr.go:59.10,60.20 1 1 github.com/NDari/vecf64/incr.go:61.10,62.24 1 1 github.com/NDari/vecf64/incr.go:63.11,64.32 1 1 github.com/NDari/vecf64/incr.go:70.36,75.22 4 0 github.com/NDari/vecf64/incr.go:75.22,77.3 1 0 github.com/NDari/vecf64/incr.go:82.56,85.22 3 1 github.com/NDari/vecf64/incr.go:85.22,87.3 1 1 github.com/NDari/vecf64/incr.go:92.59,94.2 1 1 github.com/NDari/vecf64/incr.go:98.60,101.22 3 1 github.com/NDari/vecf64/incr.go:101.22,103.3 1 1 github.com/NDari/vecf64/incr.go:108.56,111.22 3 1 github.com/NDari/vecf64/incr.go:111.22,113.3 1 1 github.com/NDari/vecf64/incr.go:118.59,120.2 1 1 github.com/NDari/vecf64/incr.go:124.60,127.22 3 1 github.com/NDari/vecf64/incr.go:127.22,129.3 1 1 github.com/NDari/vecf64/incr.go:134.56,137.22 3 1 github.com/NDari/vecf64/incr.go:137.22,139.3 1 1 github.com/NDari/vecf64/incr.go:144.57,147.22 3 1 github.com/NDari/vecf64/incr.go:147.22,149.3 1 1 github.com/NDari/vecf64/utils.go:4.38,7.17 3 1 github.com/NDari/vecf64/utils.go:12.2,12.14 1 1 github.com/NDari/vecf64/utils.go:16.2,17.47 2 1 github.com/NDari/vecf64/utils.go:26.2,26.10 1 1 github.com/NDari/vecf64/utils.go:7.17,10.3 2 0 github.com/NDari/vecf64/utils.go:12.14,13.64 1 0 github.com/NDari/vecf64/utils.go:17.47,20.11 2 1 github.com/NDari/vecf64/utils.go:20.11,22.4 1 1 github.com/NDari/vecf64/utils.go:22.4,24.4 1 0 github.com/NDari/vecf64/utils.go:30.87,32.17 2 1 github.com/NDari/vecf64/utils.go:36.2,36.22 1 1 github.com/NDari/vecf64/utils.go:39.2,39.8 1 1 github.com/NDari/vecf64/utils.go:32.17,34.3 1 0 github.com/NDari/vecf64/utils.go:36.22,38.3 1 1 github.com/NDari/vecf64/arith.go:7.26,10.22 3 1 github.com/NDari/vecf64/arith.go:10.22,11.15 1 1 github.com/NDari/vecf64/arith.go:12.10,13.21 1 1 github.com/NDari/vecf64/arith.go:14.10,15.12 1 1 github.com/NDari/vecf64/arith.go:16.10,17.16 1 1 github.com/NDari/vecf64/arith.go:18.10,19.20 1 1 github.com/NDari/vecf64/arith.go:20.11,21.28 1 1 github.com/NDari/vecf64/arith.go:26.26,29.22 3 0 github.com/NDari/vecf64/arith.go:29.22,31.3 1 0 github.com/NDari/vecf64/arith.go:36.36,37.22 1 1 github.com/NDari/vecf64/arith.go:37.22,39.3 1 1 github.com/NDari/vecf64/arith.go:44.39,46.2 1 1 github.com/NDari/vecf64/arith.go:50.40,51.22 1 1 github.com/NDari/vecf64/arith.go:51.22,53.3 1 1 github.com/NDari/vecf64/arith.go:58.36,59.22 1 1 github.com/NDari/vecf64/arith.go:59.22,61.3 1 1 github.com/NDari/vecf64/arith.go:66.39,68.2 1 1 github.com/NDari/vecf64/arith.go:72.40,73.22 1 1 github.com/NDari/vecf64/arith.go:73.22,75.3 1 1 github.com/NDari/vecf64/arith.go:80.36,81.22 1 1 github.com/NDari/vecf64/arith.go:81.22,83.3 1 1 github.com/NDari/vecf64/arith.go:88.37,89.22 1 1 github.com/NDari/vecf64/arith.go:89.22,91.3 1 1 github.com/NDari/vecf64/arith.go:95.26,99.22 3 1 github.com/NDari/vecf64/arith.go:99.22,101.13 2 1 github.com/NDari/vecf64/arith.go:101.13,103.4 1 1 github.com/NDari/vecf64/arith.go:108.26,112.22 3 1 github.com/NDari/vecf64/arith.go:112.22,114.13 2 1 github.com/NDari/vecf64/arith.go:114.13,116.4 1 1 github.com/NDari/vecf64/arith.go:123.31,125.2 1 1 github.com/NDari/vecf64/arith.go:128.42,129.16 1 1 github.com/NDari/vecf64/arith.go:132.2,132.36 1 1 github.com/NDari/vecf64/arith.go:129.16,130.49 1 1 github.com/NDari/vecf64/arith.go:136.42,137.16 1 1 github.com/NDari/vecf64/arith.go:140.2,140.36 1 1 github.com/NDari/vecf64/arith.go:137.16,138.49 1 1 github.com/NDari/vecf64/arith.go:144.30,148.22 4 1 github.com/NDari/vecf64/arith.go:169.2,169.12 1 1 github.com/NDari/vecf64/arith.go:148.22,149.11 1 1 github.com/NDari/vecf64/arith.go:158.3,158.40 1 1 github.com/NDari/vecf64/arith.go:164.3,164.12 1 1 github.com/NDari/vecf64/arith.go:149.11,154.12 4 1 github.com/NDari/vecf64/arith.go:158.40,161.9 3 1 github.com/NDari/vecf64/arith.go:164.12,167.4 2 1 github.com/NDari/vecf64/arith.go:173.30,177.22 4 1 github.com/NDari/vecf64/arith.go:198.2,198.12 1 1 github.com/NDari/vecf64/arith.go:177.22,178.11 1 1 github.com/NDari/vecf64/arith.go:187.3,187.41 1 1 github.com/NDari/vecf64/arith.go:193.3,193.12 1 1 github.com/NDari/vecf64/arith.go:178.11,183.12 4 1 github.com/NDari/vecf64/arith.go:187.41,190.9 3 1 github.com/NDari/vecf64/arith.go:193.12,196.4 2 1 github.com/NDari/vecf64/arith.go:204.35,204.51 1 1 github.com/NDari/vecf64/arith.go:210.35,211.12 1 1 github.com/NDari/vecf64/arith.go:214.3,214.11 1 1 github.com/NDari/vecf64/arith.go:211.12,213.4 1 1 github.com/NDari/vecf64/arith.go:217.35,218.12 1 1 github.com/NDari/vecf64/arith.go:221.3,221.11 1 1 github.com/NDari/vecf64/arith.go:218.12,220.4 1 1 github.com/NDari/vecf64/incr.go:6.36,10.22 4 1 github.com/NDari/vecf64/incr.go:10.22,12.3 1 1 github.com/NDari/vecf64/incr.go:16.36,20.22 4 1 github.com/NDari/vecf64/incr.go:20.22,22.3 1 1 github.com/NDari/vecf64/incr.go:26.36,30.22 4 1 github.com/NDari/vecf64/incr.go:30.22,32.3 1 1 github.com/NDari/vecf64/incr.go:35.36,39.22 4 1 github.com/NDari/vecf64/incr.go:39.22,40.16 1 1 github.com/NDari/vecf64/incr.go:44.3,44.22 1 1 github.com/NDari/vecf64/incr.go:40.16,42.12 2 1 github.com/NDari/vecf64/incr.go:49.36,53.22 4 1 github.com/NDari/vecf64/incr.go:53.22,54.15 1 1 github.com/NDari/vecf64/incr.go:55.10,56.13 1 1 github.com/NDari/vecf64/incr.go:57.10,58.16 1 1 github.com/NDari/vecf64/incr.go:59.10,60.20 1 1 github.com/NDari/vecf64/incr.go:61.10,62.24 1 1 github.com/NDari/vecf64/incr.go:63.11,64.32 1 1 github.com/NDari/vecf64/incr.go:70.36,75.22 4 0 github.com/NDari/vecf64/incr.go:75.22,77.3 1 0 github.com/NDari/vecf64/incr.go:82.56,85.22 3 1 github.com/NDari/vecf64/incr.go:85.22,87.3 1 1 github.com/NDari/vecf64/incr.go:92.59,94.2 1 1 github.com/NDari/vecf64/incr.go:98.60,101.22 3 1 github.com/NDari/vecf64/incr.go:101.22,103.3 1 1 github.com/NDari/vecf64/incr.go:108.56,111.22 3 1 github.com/NDari/vecf64/incr.go:111.22,113.3 1 1 github.com/NDari/vecf64/incr.go:118.59,120.2 1 1 github.com/NDari/vecf64/incr.go:124.60,127.22 3 1 github.com/NDari/vecf64/incr.go:127.22,129.3 1 1 github.com/NDari/vecf64/incr.go:134.56,137.22 3 1 github.com/NDari/vecf64/incr.go:137.22,139.3 1 1 github.com/NDari/vecf64/incr.go:144.57,147.22 3 1 github.com/NDari/vecf64/incr.go:147.22,149.3 1 1 github.com/NDari/vecf64/utils.go:4.38,7.17 3 1 github.com/NDari/vecf64/utils.go:12.2,12.14 1 1 github.com/NDari/vecf64/utils.go:16.2,17.47 2 1 github.com/NDari/vecf64/utils.go:26.2,26.10 1 1 github.com/NDari/vecf64/utils.go:7.17,10.3 2 0 github.com/NDari/vecf64/utils.go:12.14,13.64 1 0 github.com/NDari/vecf64/utils.go:17.47,20.11 2 1 github.com/NDari/vecf64/utils.go:20.11,22.4 1 1 github.com/NDari/vecf64/utils.go:22.4,24.4 1 0 github.com/NDari/vecf64/utils.go:30.87,32.17 2 1 github.com/NDari/vecf64/utils.go:36.2,36.22 1 1 github.com/NDari/vecf64/utils.go:39.2,39.8 1 1 github.com/NDari/vecf64/utils.go:32.17,34.3 1 0 github.com/NDari/vecf64/utils.go:36.22,38.3 1 1 vecf64-0.9.0/go.go000066400000000000000000000016741353362416300135670ustar00rootroot00000000000000// +build !avx,!sse package vecf64 import "math" // Add performs a̅ + b̅. a̅ will be clobbered func Add(a, b []float64) { b = b[:len(a)] for i, v := range a { a[i] = v + b[i] } } // Sub performs a̅ - b̅. a̅ will be clobbered func Sub(a, b []float64) { b = b[:len(a)] for i, v := range a { a[i] = v - b[i] } } // Mul performs a̅ × b̅. a̅ will be clobbered func Mul(a, b []float64) { b = b[:len(a)] for i, v := range a { a[i] = v * b[i] } } // Div performs a̅ ÷ b̅. a̅ will be clobbered func Div(a, b []float64) { b = b[:len(a)] for i, v := range a { if b[i] == 0 { a[i] = math.Inf(0) continue } a[i] = v / b[i] } } // Sqrt performs √a̅ elementwise. a̅ will be clobbered func Sqrt(a []float64) { for i, v := range a { a[i] = math.Sqrt(v) } } // InvSqrt performs 1/√a̅ elementwise. a̅ will be clobbered func InvSqrt(a []float64) { for i, v := range a { a[i] = float64(1) / math.Sqrt(v) } } vecf64-0.9.0/go.mod000066400000000000000000000002361353362416300137320ustar00rootroot00000000000000module gorgonia.org/vecf64 go 1.13 require ( github.com/davecgh/go-spew v1.1.0 github.com/pmezard/go-difflib v1.0.0 github.com/stretchr/testify v1.1.4 ) vecf64-0.9.0/go.sum000066400000000000000000000010111353362416300137470ustar00rootroot00000000000000github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.1.4 h1:ToftOQTytwshuOSj6bDSolVUa3GINfJP/fg3OkkOzQQ= github.com/stretchr/testify v1.1.4/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= vecf64-0.9.0/go_test.go000066400000000000000000000050621353362416300146210ustar00rootroot00000000000000// +build !sse // +build !avx package vecf64 /* IMPORTANT NOTE: Currently Div does not handle division by zero correctly. It returns a NaN instead of +Inf */ import ( "math" "testing" "unsafe" "github.com/stretchr/testify/assert" ) func TestDiv(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime-1) correct := Range(0, niceprime-1) for i := range correct { correct[i] = 1 } Div(a, a) assert.Equal(correct[1:], a[1:]) assert.Equal(true, math.IsInf(a[0], 0), "a[0] is: %v", a[0]) b := Range(niceprime, 2*niceprime-1) for i := range correct { correct[i] = a[i] / b[i] } Div(a, b) assert.Equal(correct[1:], a[1:]) assert.Equal(true, math.IsInf(a[0], 0), "a[0] is: %v", a[0]) /* Weird Corner Cases*/ for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { b = Range(i, 2*i) correct = make([]float64, i) for j := range correct { correct[j] = a[j] / b[j] } Div(a, b) assert.Equal(correct[1:], a[1:]) } } } func TestSqrt(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime-1) correct := Range(0, niceprime-1) for i, v := range correct { correct[i] = math.Sqrt(v) } Sqrt(a) assert.Equal(correct, a) // negatives a = []float64{-1, -2, -3, -4} Sqrt(a) for _, v := range a { if !math.IsNaN(v) { t.Error("Expected NaN") } } /* Weird Corner Cases*/ for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { correct = make([]float64, i) for j := range correct { correct[j] = math.Sqrt(a[j]) } Sqrt(a) assert.Equal(correct, a) } } } func TestInvSqrt(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime-1) correct := Range(0, niceprime-1) for i, v := range correct { correct[i] = 1.0 / math.Sqrt(v) } InvSqrt(a) assert.Equal(correct[1:], a[1:]) if !math.IsInf(a[0], 0) { t.Error("1/0 should be +Inf or -Inf") } // Weird Corner Cases for i := 1; i < 65; i++ { a = Range(0, i) var testAlign bool addr := &a[0] u := uint(uintptr(unsafe.Pointer(addr))) if u&uint(32) != 0 { testAlign = true } if testAlign { correct = make([]float64, i) for j := range correct { correct[j] = 1.0 / math.Sqrt(a[j]) } InvSqrt(a) assert.Equal(correct[1:], a[1:], "i = %d, %v", i, Range(0, i)) if !math.IsInf(a[0], 0) { t.Error("1/0 should be +Inf or -Inf") } } } } vecf64-0.9.0/incr.go000066400000000000000000000062361353362416300141140ustar00rootroot00000000000000package vecf64 import "math" // IncrAdd performs a̅ + b̅ and then adds it elementwise to the incr slice func IncrAdd(a, b, incr []float64) { b = b[:len(a)] incr = incr[:len(a)] for i, v := range a { incr[i] += v + b[i] } } // IncrSub performs a̅ = b̅ and then adds it elementwise to the incr slice func IncrSub(a, b, incr []float64) { b = b[:len(a)] incr = incr[:len(a)] for i, v := range a { incr[i] += v - b[i] } } // IncrMul performs a̅ × b̅ and then adds it elementwise to the incr slice func IncrMul(a, b, incr []float64) { b = b[:len(a)] incr = incr[:len(a)] for i, v := range a { incr[i] += v * b[i] } } func IncrDiv(a, b, incr []float64) { b = b[:len(a)] incr = incr[:len(a)] for i, v := range a { if b[i] == 0 { incr[i] = math.Inf(0) continue } incr[i] += v / b[i] } } // IncrDiv performs a̅ ÷ b̅. a̅ will be clobbered func IncrPow(a, b, incr []float64) { b = b[:len(a)] incr = incr[:len(a)] for i, v := range a { switch b[i] { case 0: incr[i]++ case 1: incr[i] += v case 2: incr[i] += v * v case 3: incr[i] += v * v * v default: incr[i] += math.Pow(v, b[i]) } } } // IncrMod performs a̅ % b̅ then adds it to incr func IncrMod(a, b, incr []float64) { b = b[:len(a)] incr = incr[:len(a)] for i, v := range a { incr[i] += math.Mod(v, b[i]) } } // Scale multiplies all values in the slice by the scalar and then increments the incr slice // incr += a̅ * s func IncrScale(a []float64, s float64, incr []float64) { incr = incr[:len(a)] for i, v := range a { incr[i] += v * s } } // IncrScaleInv divides all values in the slice by the scalar and then increments the incr slice // incr += a̅ / s func IncrScaleInv(a []float64, s float64, incr []float64) { IncrScale(a, 1/s, incr) } /// IncrScaleInvR divides all numbers in the slice by a scalar and then increments the incr slice // incr += s / a̅ func IncrScaleInvR(a []float64, s float64, incr []float64) { incr = incr[:len(a)] for i, v := range a { incr[i] += s / v } } // IncrTrans adds all the values in the slice by a scalar and then increments the incr slice // incr += a̅ + s func IncrTrans(a []float64, s float64, incr []float64) { incr = incr[:len(a)] for i, v := range a { incr[i] += v + s } } // IncrTransInv subtracts all the values in the slice by a scalar and then increments the incr slice // incr += a̅ - s func IncrTransInv(a []float64, s float64, incr []float64) { IncrTrans(a, -s, incr) } // IncrTransInvR subtracts all the numbers in a slice from a scalar and then increments the incr slice // incr += s - a̅ func IncrTransInvR(a []float64, s float64, incr []float64) { incr = incr[:len(a)] for i, v := range a { incr[i] += s - v } } // IncrPowOf performs elementwise power function and then increments the incr slice // incr += a̅ ^ s func IncrPowOf(a []float64, s float64, incr []float64) { incr = incr[:len(a)] for i, v := range a { incr[i] += math.Pow(v, s) } } // PowOfR performs elementwise power function below and then increments the incr slice. // incr += s ^ a̅ func IncrPowOfR(a []float64, s float64, incr []float64) { incr = incr[:len(a)] for i, v := range a { incr[i] += math.Pow(s, v) } } vecf64-0.9.0/incr_test.go000066400000000000000000000105271353362416300151510ustar00rootroot00000000000000package vecf64 import ( "math" "testing" "github.com/stretchr/testify/assert" ) func makeIncr(size int) []float64 { retVal := make([]float64, size) for i := range retVal { retVal[i] = 100 } return retVal } func TestIncrAdd(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime) incr := makeIncr(len(a)) correct := Range(0, niceprime) for i := range correct { correct[i] = correct[i] + correct[i] + incr[i] } IncrAdd(a, a, incr) assert.Equal(correct, incr) b := Range(niceprime, 2*niceprime) for i := range correct { correct[i] = a[i] + b[i] + incr[i] } IncrAdd(a, b, incr) assert.Equal(correct, incr) } func TestIncrSub(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime) incr := makeIncr(len(a)) correct := make([]float64, niceprime) copy(correct, incr) IncrSub(a, a, incr) assert.Equal(correct, incr) b := Range(niceprime, 2*niceprime) for i := range correct { correct[i] = a[i] - b[i] + incr[i] } IncrSub(a, b, incr) assert.Equal(correct, incr) } func TestIncrMul(t *testing.T) { assert := assert.New(t) a := Range(0, niceprime) incr := makeIncr(len(a)) correct := Range(0, niceprime) for i := range correct { correct[i] = correct[i]*correct[i] + incr[i] } IncrMul(a, a, incr) assert.Equal(correct, incr) b := Range(niceprime, 2*niceprime) for i := range correct { correct[i] = a[i]*b[i] + incr[i] } IncrMul(a, b, incr) assert.Equal(correct, incr) } func TestIncrDiv(t *testing.T) { assert := assert.New(t) a := []float64{1, 2, 4, 8, 10} incr := makeIncr(len(a)) correct := make([]float64, len(a)) copy(correct, a) for i := range correct { correct[i] = correct[i]/correct[i] + incr[i] } IncrDiv(a, a, incr) assert.Equal(correct, incr) b := []float64{2, 4, 8, 16, 20} incr = makeIncr(len(a)) for i := range correct { correct[i] = a[i]/b[i] + incr[i] } IncrDiv(a, b, incr) assert.Equal(correct, incr) // division by 0 b = make([]float64, len(a)) IncrDiv(a, b, incr) for _, v := range incr { if !math.IsInf(v, 0) && !math.IsNaN(v) { t.Error("Expected Inf or NaN") } } } func TestIncrPow(t *testing.T) { a := []float64{0, 1, 2, 3, 4} b := []float64{0, 1, 2, 3, 4} incr := makeIncr(len(a)) correct := make([]float64, 5) for i := range correct { correct[i] = math.Pow(a[i], b[i]) + incr[i] } IncrPow(a, b, incr) assert.Equal(t, correct, incr) } func TestIncrScale(t *testing.T) { a := []float64{0, 1, 2, 3, 4} incr := makeIncr(len(a)) correct := make([]float64, 5) for i := range correct { correct[i] = a[i]*5 + incr[i] } IncrScale(a, 5, incr) assert.Equal(t, correct, incr) } func TestIncrScaleInv(t *testing.T) { a := []float64{0, 1, 2, 4, 6} incr := makeIncr(len(a)) correct := make([]float64, len(a)) for i := range correct { correct[i] = a[i]/2 + incr[i] } IncrScaleInv(a, 2, incr) assert.Equal(t, correct, incr) } func TestIncrScaleInvR(t *testing.T) { a := []float64{0, 1, 2, 4, 6} incr := makeIncr(len(a)) correct := make([]float64, len(a)) for i := range correct { correct[i] = 2/a[i] + incr[i] } IncrScaleInvR(a, 2, incr) assert.Equal(t, correct, incr) } func TestIncrTrans(t *testing.T) { a := []float64{1, 2, 3, 4} incr := makeIncr(len(a)) correct := make([]float64, len(a)) for i := range correct { correct[i] = a[i] + float64(1) + incr[i] } IncrTrans(a, 1, incr) assert.Equal(t, correct, incr) } func TestIncrTransInv(t *testing.T) { a := []float64{1, 2, 3, 4} incr := makeIncr(len(a)) correct := make([]float64, len(a)) for i := range correct { correct[i] = a[i] - float64(1) + incr[i] } IncrTransInv(a, 1, incr) assert.Equal(t, correct, incr) } func TestIncrTransInvR(t *testing.T) { a := []float64{1, 2, 3, 4} incr := makeIncr(len(a)) correct := make([]float64, len(a)) for i := range correct { correct[i] = float64(1) - a[i] + incr[i] } IncrTransInvR(a, 1, incr) assert.Equal(t, correct, incr) } func TestIncrPowOf(t *testing.T) { a := []float64{1, 2, 3, 4} incr := makeIncr(len(a)) correct := make([]float64, len(a)) for i := range correct { correct[i] = math.Pow(a[i], 5) + incr[i] } IncrPowOf(a, 5, incr) assert.Equal(t, correct, incr) } func TestIncrPowOfR(t *testing.T) { a := []float64{1, 2, 3, 4} incr := makeIncr(len(a)) correct := make([]float64, len(a)) for i := range correct { correct[i] = math.Pow(5, a[i]) + incr[i] } IncrPowOfR(a, 5, incr) assert.Equal(t, correct, incr) } vecf64-0.9.0/test.cover000066400000000000000000000133201353362416300146410ustar00rootroot00000000000000mode: set github.com/NDari/vecf64/arith.go:7.26,10.22 3 1 github.com/NDari/vecf64/arith.go:10.22,11.15 1 1 github.com/NDari/vecf64/arith.go:12.10,13.21 1 1 github.com/NDari/vecf64/arith.go:14.10,15.12 1 1 github.com/NDari/vecf64/arith.go:16.10,17.16 1 1 github.com/NDari/vecf64/arith.go:18.10,19.20 1 1 github.com/NDari/vecf64/arith.go:20.11,21.28 1 1 github.com/NDari/vecf64/arith.go:26.26,29.22 3 0 github.com/NDari/vecf64/arith.go:29.22,31.3 1 0 github.com/NDari/vecf64/arith.go:36.36,37.22 1 1 github.com/NDari/vecf64/arith.go:37.22,39.3 1 1 github.com/NDari/vecf64/arith.go:44.39,46.2 1 1 github.com/NDari/vecf64/arith.go:50.40,51.22 1 1 github.com/NDari/vecf64/arith.go:51.22,53.3 1 1 github.com/NDari/vecf64/arith.go:58.36,59.22 1 1 github.com/NDari/vecf64/arith.go:59.22,61.3 1 1 github.com/NDari/vecf64/arith.go:66.39,68.2 1 1 github.com/NDari/vecf64/arith.go:72.40,73.22 1 1 github.com/NDari/vecf64/arith.go:73.22,75.3 1 1 github.com/NDari/vecf64/arith.go:80.36,81.22 1 1 github.com/NDari/vecf64/arith.go:81.22,83.3 1 1 github.com/NDari/vecf64/arith.go:88.37,89.22 1 1 github.com/NDari/vecf64/arith.go:89.22,91.3 1 1 github.com/NDari/vecf64/arith.go:95.26,99.22 3 1 github.com/NDari/vecf64/arith.go:99.22,101.13 2 1 github.com/NDari/vecf64/arith.go:101.13,103.4 1 1 github.com/NDari/vecf64/arith.go:108.26,112.22 3 1 github.com/NDari/vecf64/arith.go:112.22,114.13 2 1 github.com/NDari/vecf64/arith.go:114.13,116.4 1 1 github.com/NDari/vecf64/arith.go:123.31,125.2 1 1 github.com/NDari/vecf64/arith.go:128.42,129.16 1 1 github.com/NDari/vecf64/arith.go:132.2,132.36 1 1 github.com/NDari/vecf64/arith.go:129.16,130.49 1 1 github.com/NDari/vecf64/arith.go:136.42,137.16 1 1 github.com/NDari/vecf64/arith.go:140.2,140.36 1 1 github.com/NDari/vecf64/arith.go:137.16,138.49 1 1 github.com/NDari/vecf64/arith.go:144.30,148.22 4 1 github.com/NDari/vecf64/arith.go:169.2,169.12 1 1 github.com/NDari/vecf64/arith.go:148.22,149.11 1 1 github.com/NDari/vecf64/arith.go:158.3,158.40 1 1 github.com/NDari/vecf64/arith.go:164.3,164.12 1 1 github.com/NDari/vecf64/arith.go:149.11,154.12 4 1 github.com/NDari/vecf64/arith.go:158.40,161.9 3 1 github.com/NDari/vecf64/arith.go:164.12,167.4 2 1 github.com/NDari/vecf64/arith.go:173.30,177.22 4 1 github.com/NDari/vecf64/arith.go:198.2,198.12 1 1 github.com/NDari/vecf64/arith.go:177.22,178.11 1 1 github.com/NDari/vecf64/arith.go:187.3,187.41 1 1 github.com/NDari/vecf64/arith.go:193.3,193.12 1 1 github.com/NDari/vecf64/arith.go:178.11,183.12 4 1 github.com/NDari/vecf64/arith.go:187.41,190.9 3 1 github.com/NDari/vecf64/arith.go:193.12,196.4 2 1 github.com/NDari/vecf64/arith.go:204.35,204.51 1 1 github.com/NDari/vecf64/arith.go:210.35,211.12 1 1 github.com/NDari/vecf64/arith.go:214.3,214.11 1 1 github.com/NDari/vecf64/arith.go:211.12,213.4 1 1 github.com/NDari/vecf64/arith.go:217.35,218.12 1 1 github.com/NDari/vecf64/arith.go:221.3,221.11 1 1 github.com/NDari/vecf64/arith.go:218.12,220.4 1 1 github.com/NDari/vecf64/go.go:8.26,11.22 3 1 github.com/NDari/vecf64/go.go:11.22,13.3 1 1 github.com/NDari/vecf64/go.go:17.26,20.22 3 1 github.com/NDari/vecf64/go.go:20.22,22.3 1 1 github.com/NDari/vecf64/go.go:26.26,29.22 3 1 github.com/NDari/vecf64/go.go:29.22,31.3 1 1 github.com/NDari/vecf64/go.go:35.26,38.22 3 1 github.com/NDari/vecf64/go.go:38.22,39.16 1 1 github.com/NDari/vecf64/go.go:44.3,44.18 1 1 github.com/NDari/vecf64/go.go:39.16,41.12 2 1 github.com/NDari/vecf64/go.go:49.24,50.22 1 1 github.com/NDari/vecf64/go.go:50.22,52.3 1 1 github.com/NDari/vecf64/go.go:56.27,57.22 1 1 github.com/NDari/vecf64/go.go:57.22,59.3 1 1 github.com/NDari/vecf64/incr.go:6.36,10.22 4 1 github.com/NDari/vecf64/incr.go:10.22,12.3 1 1 github.com/NDari/vecf64/incr.go:16.36,20.22 4 1 github.com/NDari/vecf64/incr.go:20.22,22.3 1 1 github.com/NDari/vecf64/incr.go:26.36,30.22 4 1 github.com/NDari/vecf64/incr.go:30.22,32.3 1 1 github.com/NDari/vecf64/incr.go:35.36,39.22 4 1 github.com/NDari/vecf64/incr.go:39.22,40.16 1 1 github.com/NDari/vecf64/incr.go:44.3,44.22 1 1 github.com/NDari/vecf64/incr.go:40.16,42.12 2 1 github.com/NDari/vecf64/incr.go:49.36,53.22 4 1 github.com/NDari/vecf64/incr.go:53.22,54.15 1 1 github.com/NDari/vecf64/incr.go:55.10,56.13 1 1 github.com/NDari/vecf64/incr.go:57.10,58.16 1 1 github.com/NDari/vecf64/incr.go:59.10,60.20 1 1 github.com/NDari/vecf64/incr.go:61.10,62.24 1 1 github.com/NDari/vecf64/incr.go:63.11,64.32 1 1 github.com/NDari/vecf64/incr.go:70.36,75.22 4 0 github.com/NDari/vecf64/incr.go:75.22,77.3 1 0 github.com/NDari/vecf64/incr.go:82.56,85.22 3 1 github.com/NDari/vecf64/incr.go:85.22,87.3 1 1 github.com/NDari/vecf64/incr.go:92.59,94.2 1 1 github.com/NDari/vecf64/incr.go:98.60,101.22 3 1 github.com/NDari/vecf64/incr.go:101.22,103.3 1 1 github.com/NDari/vecf64/incr.go:108.56,111.22 3 1 github.com/NDari/vecf64/incr.go:111.22,113.3 1 1 github.com/NDari/vecf64/incr.go:118.59,120.2 1 1 github.com/NDari/vecf64/incr.go:124.60,127.22 3 1 github.com/NDari/vecf64/incr.go:127.22,129.3 1 1 github.com/NDari/vecf64/incr.go:134.56,137.22 3 1 github.com/NDari/vecf64/incr.go:137.22,139.3 1 1 github.com/NDari/vecf64/incr.go:144.57,147.22 3 1 github.com/NDari/vecf64/incr.go:147.22,149.3 1 1 github.com/NDari/vecf64/utils.go:4.38,7.17 3 1 github.com/NDari/vecf64/utils.go:12.2,12.14 1 1 github.com/NDari/vecf64/utils.go:16.2,17.47 2 1 github.com/NDari/vecf64/utils.go:26.2,26.10 1 1 github.com/NDari/vecf64/utils.go:7.17,10.3 2 0 github.com/NDari/vecf64/utils.go:12.14,13.64 1 0 github.com/NDari/vecf64/utils.go:17.47,20.11 2 1 github.com/NDari/vecf64/utils.go:20.11,22.4 1 1 github.com/NDari/vecf64/utils.go:22.4,24.4 1 0 github.com/NDari/vecf64/utils.go:30.87,32.17 2 1 github.com/NDari/vecf64/utils.go:36.2,36.22 1 1 github.com/NDari/vecf64/utils.go:39.2,39.8 1 1 github.com/NDari/vecf64/utils.go:32.17,34.3 1 0 github.com/NDari/vecf64/utils.go:36.22,38.3 1 1 vecf64-0.9.0/test.cover.avx000066400000000000000000000121231353362416300154360ustar00rootroot00000000000000mode: set github.com/NDari/vecf64/arith.go:7.26,10.22 3 1 github.com/NDari/vecf64/arith.go:10.22,11.15 1 1 github.com/NDari/vecf64/arith.go:12.10,13.21 1 1 github.com/NDari/vecf64/arith.go:14.10,15.12 1 1 github.com/NDari/vecf64/arith.go:16.10,17.16 1 1 github.com/NDari/vecf64/arith.go:18.10,19.20 1 1 github.com/NDari/vecf64/arith.go:20.11,21.28 1 1 github.com/NDari/vecf64/arith.go:26.26,29.22 3 0 github.com/NDari/vecf64/arith.go:29.22,31.3 1 0 github.com/NDari/vecf64/arith.go:36.36,37.22 1 1 github.com/NDari/vecf64/arith.go:37.22,39.3 1 1 github.com/NDari/vecf64/arith.go:44.39,46.2 1 1 github.com/NDari/vecf64/arith.go:50.40,51.22 1 1 github.com/NDari/vecf64/arith.go:51.22,53.3 1 1 github.com/NDari/vecf64/arith.go:58.36,59.22 1 1 github.com/NDari/vecf64/arith.go:59.22,61.3 1 1 github.com/NDari/vecf64/arith.go:66.39,68.2 1 1 github.com/NDari/vecf64/arith.go:72.40,73.22 1 1 github.com/NDari/vecf64/arith.go:73.22,75.3 1 1 github.com/NDari/vecf64/arith.go:80.36,81.22 1 1 github.com/NDari/vecf64/arith.go:81.22,83.3 1 1 github.com/NDari/vecf64/arith.go:88.37,89.22 1 1 github.com/NDari/vecf64/arith.go:89.22,91.3 1 1 github.com/NDari/vecf64/arith.go:95.26,99.22 3 1 github.com/NDari/vecf64/arith.go:99.22,101.13 2 1 github.com/NDari/vecf64/arith.go:101.13,103.4 1 1 github.com/NDari/vecf64/arith.go:108.26,112.22 3 1 github.com/NDari/vecf64/arith.go:112.22,114.13 2 1 github.com/NDari/vecf64/arith.go:114.13,116.4 1 1 github.com/NDari/vecf64/arith.go:123.31,125.2 1 1 github.com/NDari/vecf64/arith.go:128.42,129.16 1 1 github.com/NDari/vecf64/arith.go:132.2,132.36 1 1 github.com/NDari/vecf64/arith.go:129.16,130.49 1 1 github.com/NDari/vecf64/arith.go:136.42,137.16 1 1 github.com/NDari/vecf64/arith.go:140.2,140.36 1 1 github.com/NDari/vecf64/arith.go:137.16,138.49 1 1 github.com/NDari/vecf64/arith.go:144.30,148.22 4 1 github.com/NDari/vecf64/arith.go:169.2,169.12 1 1 github.com/NDari/vecf64/arith.go:148.22,149.11 1 1 github.com/NDari/vecf64/arith.go:158.3,158.40 1 1 github.com/NDari/vecf64/arith.go:164.3,164.12 1 1 github.com/NDari/vecf64/arith.go:149.11,154.12 4 1 github.com/NDari/vecf64/arith.go:158.40,161.9 3 1 github.com/NDari/vecf64/arith.go:164.12,167.4 2 1 github.com/NDari/vecf64/arith.go:173.30,177.22 4 1 github.com/NDari/vecf64/arith.go:198.2,198.12 1 1 github.com/NDari/vecf64/arith.go:177.22,178.11 1 1 github.com/NDari/vecf64/arith.go:187.3,187.41 1 1 github.com/NDari/vecf64/arith.go:193.3,193.12 1 1 github.com/NDari/vecf64/arith.go:178.11,183.12 4 1 github.com/NDari/vecf64/arith.go:187.41,190.9 3 1 github.com/NDari/vecf64/arith.go:193.12,196.4 2 1 github.com/NDari/vecf64/arith.go:204.35,204.51 1 1 github.com/NDari/vecf64/arith.go:210.35,211.12 1 1 github.com/NDari/vecf64/arith.go:214.3,214.11 1 1 github.com/NDari/vecf64/arith.go:211.12,213.4 1 1 github.com/NDari/vecf64/arith.go:217.35,218.12 1 1 github.com/NDari/vecf64/arith.go:221.3,221.11 1 1 github.com/NDari/vecf64/arith.go:218.12,220.4 1 1 github.com/NDari/vecf64/incr.go:6.36,10.22 4 1 github.com/NDari/vecf64/incr.go:10.22,12.3 1 1 github.com/NDari/vecf64/incr.go:16.36,20.22 4 1 github.com/NDari/vecf64/incr.go:20.22,22.3 1 1 github.com/NDari/vecf64/incr.go:26.36,30.22 4 1 github.com/NDari/vecf64/incr.go:30.22,32.3 1 1 github.com/NDari/vecf64/incr.go:35.36,39.22 4 1 github.com/NDari/vecf64/incr.go:39.22,40.16 1 1 github.com/NDari/vecf64/incr.go:44.3,44.22 1 1 github.com/NDari/vecf64/incr.go:40.16,42.12 2 1 github.com/NDari/vecf64/incr.go:49.36,53.22 4 1 github.com/NDari/vecf64/incr.go:53.22,54.15 1 1 github.com/NDari/vecf64/incr.go:55.10,56.13 1 1 github.com/NDari/vecf64/incr.go:57.10,58.16 1 1 github.com/NDari/vecf64/incr.go:59.10,60.20 1 1 github.com/NDari/vecf64/incr.go:61.10,62.24 1 1 github.com/NDari/vecf64/incr.go:63.11,64.32 1 1 github.com/NDari/vecf64/incr.go:70.36,75.22 4 0 github.com/NDari/vecf64/incr.go:75.22,77.3 1 0 github.com/NDari/vecf64/incr.go:82.56,85.22 3 1 github.com/NDari/vecf64/incr.go:85.22,87.3 1 1 github.com/NDari/vecf64/incr.go:92.59,94.2 1 1 github.com/NDari/vecf64/incr.go:98.60,101.22 3 1 github.com/NDari/vecf64/incr.go:101.22,103.3 1 1 github.com/NDari/vecf64/incr.go:108.56,111.22 3 1 github.com/NDari/vecf64/incr.go:111.22,113.3 1 1 github.com/NDari/vecf64/incr.go:118.59,120.2 1 1 github.com/NDari/vecf64/incr.go:124.60,127.22 3 1 github.com/NDari/vecf64/incr.go:127.22,129.3 1 1 github.com/NDari/vecf64/incr.go:134.56,137.22 3 1 github.com/NDari/vecf64/incr.go:137.22,139.3 1 1 github.com/NDari/vecf64/incr.go:144.57,147.22 3 1 github.com/NDari/vecf64/incr.go:147.22,149.3 1 1 github.com/NDari/vecf64/utils.go:4.38,7.17 3 1 github.com/NDari/vecf64/utils.go:12.2,12.14 1 1 github.com/NDari/vecf64/utils.go:16.2,17.47 2 1 github.com/NDari/vecf64/utils.go:26.2,26.10 1 1 github.com/NDari/vecf64/utils.go:7.17,10.3 2 0 github.com/NDari/vecf64/utils.go:12.14,13.64 1 0 github.com/NDari/vecf64/utils.go:17.47,20.11 2 1 github.com/NDari/vecf64/utils.go:20.11,22.4 1 1 github.com/NDari/vecf64/utils.go:22.4,24.4 1 0 github.com/NDari/vecf64/utils.go:30.87,32.17 2 1 github.com/NDari/vecf64/utils.go:36.2,36.22 1 1 github.com/NDari/vecf64/utils.go:39.2,39.8 1 1 github.com/NDari/vecf64/utils.go:32.17,34.3 1 0 github.com/NDari/vecf64/utils.go:36.22,38.3 1 1 vecf64-0.9.0/test.cover.sse000066400000000000000000000121231353362416300154320ustar00rootroot00000000000000mode: set github.com/NDari/vecf64/arith.go:7.26,10.22 3 1 github.com/NDari/vecf64/arith.go:10.22,11.15 1 1 github.com/NDari/vecf64/arith.go:12.10,13.21 1 1 github.com/NDari/vecf64/arith.go:14.10,15.12 1 1 github.com/NDari/vecf64/arith.go:16.10,17.16 1 1 github.com/NDari/vecf64/arith.go:18.10,19.20 1 1 github.com/NDari/vecf64/arith.go:20.11,21.28 1 1 github.com/NDari/vecf64/arith.go:26.26,29.22 3 0 github.com/NDari/vecf64/arith.go:29.22,31.3 1 0 github.com/NDari/vecf64/arith.go:36.36,37.22 1 1 github.com/NDari/vecf64/arith.go:37.22,39.3 1 1 github.com/NDari/vecf64/arith.go:44.39,46.2 1 1 github.com/NDari/vecf64/arith.go:50.40,51.22 1 1 github.com/NDari/vecf64/arith.go:51.22,53.3 1 1 github.com/NDari/vecf64/arith.go:58.36,59.22 1 1 github.com/NDari/vecf64/arith.go:59.22,61.3 1 1 github.com/NDari/vecf64/arith.go:66.39,68.2 1 1 github.com/NDari/vecf64/arith.go:72.40,73.22 1 1 github.com/NDari/vecf64/arith.go:73.22,75.3 1 1 github.com/NDari/vecf64/arith.go:80.36,81.22 1 1 github.com/NDari/vecf64/arith.go:81.22,83.3 1 1 github.com/NDari/vecf64/arith.go:88.37,89.22 1 1 github.com/NDari/vecf64/arith.go:89.22,91.3 1 1 github.com/NDari/vecf64/arith.go:95.26,99.22 3 1 github.com/NDari/vecf64/arith.go:99.22,101.13 2 1 github.com/NDari/vecf64/arith.go:101.13,103.4 1 1 github.com/NDari/vecf64/arith.go:108.26,112.22 3 1 github.com/NDari/vecf64/arith.go:112.22,114.13 2 1 github.com/NDari/vecf64/arith.go:114.13,116.4 1 1 github.com/NDari/vecf64/arith.go:123.31,125.2 1 1 github.com/NDari/vecf64/arith.go:128.42,129.16 1 1 github.com/NDari/vecf64/arith.go:132.2,132.36 1 1 github.com/NDari/vecf64/arith.go:129.16,130.49 1 1 github.com/NDari/vecf64/arith.go:136.42,137.16 1 1 github.com/NDari/vecf64/arith.go:140.2,140.36 1 1 github.com/NDari/vecf64/arith.go:137.16,138.49 1 1 github.com/NDari/vecf64/arith.go:144.30,148.22 4 1 github.com/NDari/vecf64/arith.go:169.2,169.12 1 1 github.com/NDari/vecf64/arith.go:148.22,149.11 1 1 github.com/NDari/vecf64/arith.go:158.3,158.40 1 1 github.com/NDari/vecf64/arith.go:164.3,164.12 1 1 github.com/NDari/vecf64/arith.go:149.11,154.12 4 1 github.com/NDari/vecf64/arith.go:158.40,161.9 3 1 github.com/NDari/vecf64/arith.go:164.12,167.4 2 1 github.com/NDari/vecf64/arith.go:173.30,177.22 4 1 github.com/NDari/vecf64/arith.go:198.2,198.12 1 1 github.com/NDari/vecf64/arith.go:177.22,178.11 1 1 github.com/NDari/vecf64/arith.go:187.3,187.41 1 1 github.com/NDari/vecf64/arith.go:193.3,193.12 1 1 github.com/NDari/vecf64/arith.go:178.11,183.12 4 1 github.com/NDari/vecf64/arith.go:187.41,190.9 3 1 github.com/NDari/vecf64/arith.go:193.12,196.4 2 1 github.com/NDari/vecf64/arith.go:204.35,204.51 1 1 github.com/NDari/vecf64/arith.go:210.35,211.12 1 1 github.com/NDari/vecf64/arith.go:214.3,214.11 1 1 github.com/NDari/vecf64/arith.go:211.12,213.4 1 1 github.com/NDari/vecf64/arith.go:217.35,218.12 1 1 github.com/NDari/vecf64/arith.go:221.3,221.11 1 1 github.com/NDari/vecf64/arith.go:218.12,220.4 1 1 github.com/NDari/vecf64/incr.go:6.36,10.22 4 1 github.com/NDari/vecf64/incr.go:10.22,12.3 1 1 github.com/NDari/vecf64/incr.go:16.36,20.22 4 1 github.com/NDari/vecf64/incr.go:20.22,22.3 1 1 github.com/NDari/vecf64/incr.go:26.36,30.22 4 1 github.com/NDari/vecf64/incr.go:30.22,32.3 1 1 github.com/NDari/vecf64/incr.go:35.36,39.22 4 1 github.com/NDari/vecf64/incr.go:39.22,40.16 1 1 github.com/NDari/vecf64/incr.go:44.3,44.22 1 1 github.com/NDari/vecf64/incr.go:40.16,42.12 2 1 github.com/NDari/vecf64/incr.go:49.36,53.22 4 1 github.com/NDari/vecf64/incr.go:53.22,54.15 1 1 github.com/NDari/vecf64/incr.go:55.10,56.13 1 1 github.com/NDari/vecf64/incr.go:57.10,58.16 1 1 github.com/NDari/vecf64/incr.go:59.10,60.20 1 1 github.com/NDari/vecf64/incr.go:61.10,62.24 1 1 github.com/NDari/vecf64/incr.go:63.11,64.32 1 1 github.com/NDari/vecf64/incr.go:70.36,75.22 4 0 github.com/NDari/vecf64/incr.go:75.22,77.3 1 0 github.com/NDari/vecf64/incr.go:82.56,85.22 3 1 github.com/NDari/vecf64/incr.go:85.22,87.3 1 1 github.com/NDari/vecf64/incr.go:92.59,94.2 1 1 github.com/NDari/vecf64/incr.go:98.60,101.22 3 1 github.com/NDari/vecf64/incr.go:101.22,103.3 1 1 github.com/NDari/vecf64/incr.go:108.56,111.22 3 1 github.com/NDari/vecf64/incr.go:111.22,113.3 1 1 github.com/NDari/vecf64/incr.go:118.59,120.2 1 1 github.com/NDari/vecf64/incr.go:124.60,127.22 3 1 github.com/NDari/vecf64/incr.go:127.22,129.3 1 1 github.com/NDari/vecf64/incr.go:134.56,137.22 3 1 github.com/NDari/vecf64/incr.go:137.22,139.3 1 1 github.com/NDari/vecf64/incr.go:144.57,147.22 3 1 github.com/NDari/vecf64/incr.go:147.22,149.3 1 1 github.com/NDari/vecf64/utils.go:4.38,7.17 3 1 github.com/NDari/vecf64/utils.go:12.2,12.14 1 1 github.com/NDari/vecf64/utils.go:16.2,17.47 2 1 github.com/NDari/vecf64/utils.go:26.2,26.10 1 1 github.com/NDari/vecf64/utils.go:7.17,10.3 2 0 github.com/NDari/vecf64/utils.go:12.14,13.64 1 0 github.com/NDari/vecf64/utils.go:17.47,20.11 2 1 github.com/NDari/vecf64/utils.go:20.11,22.4 1 1 github.com/NDari/vecf64/utils.go:22.4,24.4 1 0 github.com/NDari/vecf64/utils.go:30.87,32.17 2 1 github.com/NDari/vecf64/utils.go:36.2,36.22 1 1 github.com/NDari/vecf64/utils.go:39.2,39.8 1 1 github.com/NDari/vecf64/utils.go:32.17,34.3 1 0 github.com/NDari/vecf64/utils.go:36.22,38.3 1 1 vecf64-0.9.0/test.sh000077500000000000000000000013711353362416300141430ustar00rootroot00000000000000set -ex go env go test -v -a -coverprofile=test.cover go test -tags='sse' -v -a -coverprofile=test.cover.sse go test -tags='avx' -v -a -coverprofile=test.cover.avx echo "mode: set" > final.cover tail -q -n +2 test.cover test.cover.sse test.cover.avx >> ./final.cover goveralls -coverprofile=./final.cover -service=travis-ci # travis compiles commands in script and then executes in bash. By adding # set -e we are changing the travis build script's behavior, and the set # -e lives on past the commands we are providing it. Some of the travis # commands are supposed to exit with non zero status, but then continue # executing. set -x makes the travis log files extremely verbose and # difficult to understand. # # see travis-ci/travis-ci#5120 set +ex vecf64-0.9.0/utils.go000066400000000000000000000013211353362416300143070ustar00rootroot00000000000000package vecf64 // Range is a function to create arithmetic progressions of float32 func Range(start, end int) []float64 { size := end - start incr := true if start > end { incr = false size = start - end } if size < 0 { panic("Cannot create a float range that is negative in size") } r := make([]float64, size) for i, v := 0, float64(start); i < size; i++ { r[i] = v if incr { v++ } else { v-- } } return r } // Reduce takes a function to reduce by, a defalut, and a splatted list of float64s func Reduce(f func(a, b float64) float64, def float64, l ...float64) (retVal float64) { retVal = def if len(l) == 0 { return } for _, v := range l { retVal = f(retVal, v) } return }