pax_global_header00006660000000000000000000000064141422164340014513gustar00rootroot0000000000000052 comment=cf2c8c1123e54ed6cfc219cb7b57d46955e78841 kmers-0.1.0/000077500000000000000000000000001414221643400126325ustar00rootroot00000000000000kmers-0.1.0/.directory000066400000000000000000000003671414221643400146450ustar00rootroot00000000000000[Dolphin] HeaderColumnWidths=576,146,186,72 PreviewsShown=false Timestamp=2021,11,8,20,47,18.925 Version=4 ViewMode=1 VisibleRoles=Details_text,Details_type,Details_modificationtime,Details_size,CustomizedDetails [Settings] HiddenFilesShown=true kmers-0.1.0/LICENSE000077500000000000000000000021131414221643400136370ustar00rootroot00000000000000Copyright (c) 2018 - 2021 Wei Shen (shenwei356@gmail.com) The MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. kmers-0.1.0/README.md000077500000000000000000000041771414221643400141250ustar00rootroot00000000000000# kmers [![Go Reference](https://pkg.go.dev/badge/github.com/shenwei356/kmers.svg)](https://pkg.go.dev/github.com/shenwei356/kmers) This package provides manipulations for bit-packed k-mers (k<=32, encoded in `uint64`). Related projects: - [unik](https://github.com/shenwei356/unik) provides k-mer serialization methods for this package. - [unikmer](https://github.com/shenwei356/unikmer), a toolkit for nucleic acid k-mer analysis, including set operations on k-mers optional with TaxIDs. - [sketches](https://pkg.go.dev/github.com/shenwei356/bio/sketches) provides generators/iterators for k-mer sketches ([Minimizer](https://academic.oup.com/bioinformatics/article/20/18/3363/202143), [Scaled MinHash](https://f1000research.com/articles/8-1006), [Closed Syncmers](https://peerj.com/articles/10805/)). ## Benchmark CPU: AMD Ryzen 7 2700X Eight-Core Processor, 3.7 GHz $ go test . -bench=Bench* -benchmem \ | grep Bench \ | perl -pe 's/\s\s+/\t/g' \ | csvtk cut -Ht -f 1,3-5 \ | csvtk add-header -t -n test,time,memory,allocs \ | csvtk pretty -t -r test time memory allocs ------------------------------------------ ------------ -------- ----------- BenchmarkEncodeK32-16 19.67 ns/op 0 B/op 0 allocs/op BenchmarkEncodeFromFormerKmerK32-16 7.692 ns/op 0 B/op 0 allocs/op BenchmarkMustEncodeFromFormerKmerK32-16 2.008 ns/op 0 B/op 0 allocs/op BenchmarkDecodeK32-16 80.73 ns/op 32 B/op 1 allocs/op BenchmarkMustDecodeK32-16 76.93 ns/op 32 B/op 1 allocs/op BenchmarkRevK32-16 3.617 ns/op 0 B/op 0 allocs/op BenchmarkCompK32-16 0.7999 ns/op 0 B/op 0 allocs/op BenchmarkRevCompK32-16 3.814 ns/op 0 B/op 0 allocs/op BenchmarkCannonalK32-16 4.147 ns/op 0 B/op 0 allocs/op ## History This package was originally maintained in [unikmer](https://github.com/shenwei356/unikmer). kmers-0.1.0/go.mod000066400000000000000000000000541414221643400137370ustar00rootroot00000000000000module github.com/shenwei356/kmers go 1.17 kmers-0.1.0/kmer-sort.go000066400000000000000000000042051414221643400151050ustar00rootroot00000000000000// Copyright © 2018-2021 Wei Shen // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package kmers // KmerCodeSlice is a slice of KmerCode, for sorting type KmerCodeSlice []KmerCode // Len return length of the slice func (codes KmerCodeSlice) Len() int { return len(codes) } // Swap swaps two elements func (codes KmerCodeSlice) Swap(i, j int) { codes[i], codes[j] = codes[j], codes[i] } // Less simply compare two KmerCode func (codes KmerCodeSlice) Less(i, j int) bool { return codes[i].Code < codes[j].Code } // func splitKmer(code uint64, k int) (uint64, uint64, uint64, uint64) { // // -====, k = 4: ---, -, =, === // return code >> 2, code & 3, code >> (uint(k-1) << 1) & 3, code & ((1 << (uint(k-1) << 1)) - 1) // } // CodeSlice is a slice of Kmer code (uint64), for sorting type CodeSlice []uint64 // Len return length of the slice func (codes CodeSlice) Len() int { return len(codes) } // Swap swaps two elements func (codes CodeSlice) Swap(i, j int) { codes[i], codes[j] = codes[j], codes[i] } // Less simply compare two KmerCode func (codes CodeSlice) Less(i, j int) bool { return codes[i] < codes[j] } kmers-0.1.0/kmer.go000066400000000000000000000304441414221643400141240ustar00rootroot00000000000000// Copyright © 2018-2021 Wei Shen // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. //b // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package kmers import ( "bytes" "errors" ) // ErrIllegalBase means that base beyond IUPAC symbols are detected. var ErrIllegalBase = errors.New("kmers: illegal base") // ErrKOverflow means K > 32. var ErrKOverflow = errors.New("kmers: k-mer size (1-32) overflow") // ErrCodeOverflow means the encode interger is bigger than 4^k. var ErrCodeOverflow = errors.New("kmers: code value overflow") // ErrKMismatch means K size mismatch. var ErrKMismatch = errors.New("kmers: K mismatch") // slice is much faster than switch and map. var base2bit = [256]uint64{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 1, 1, 0, 4, 4, 2, 0, 4, 4, 2, 4, 0, 0, 4, 4, 4, 0, 1, 3, 3, 0, 0, 4, 1, 4, 4, 4, 4, 4, 4, 4, 0, 1, 1, 0, 4, 4, 2, 0, 4, 4, 2, 4, 0, 0, 4, 4, 4, 0, 1, 3, 3, 0, 0, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, } // var base2bit []uint64 // MaxCode is the maxinum interger for all Ks. var MaxCode []uint64 func init() { MaxCode = make([]uint64, 33) for i := 1; i <= 32; i++ { MaxCode[i] = 1< 32 { return 0, ErrKOverflow } var v uint64 for _, b := range kmer { code <<= 2 v = base2bit[b] // if v > 3 { if v == 4 { return code, ErrIllegalBase } code |= v } return code, nil } // ErrNotConsecutiveKmers means the two k-mers are not consecutive. var ErrNotConsecutiveKmers = errors.New("kmers: not consecutive k-mers") // MustEncodeFromFormerKmer encodes from former the k-mer, // assuming the k-mer and leftKmer are both OK. func MustEncodeFromFormerKmer(kmer []byte, leftKmer []byte, leftCode uint64) (uint64, error) { v := base2bit[kmer[len(kmer)-1]] // if v > 3 { if v == 4 { return leftCode, ErrIllegalBase } // retrieve (k-1)*2 bits and << 2, and then add v return leftCode&((1<<(uint(len(kmer)-1)<<1))-1)<<2 | v, nil } // EncodeFromFormerKmer encodes from the former k-mer, inspired by ntHash func EncodeFromFormerKmer(kmer []byte, leftKmer []byte, leftCode uint64) (uint64, error) { if len(kmer) == 0 { return 0, ErrKOverflow } if len(kmer) != len(leftKmer) { return 0, ErrKMismatch } if !bytes.Equal(kmer[0:len(kmer)-1], leftKmer[1:]) { return 0, ErrNotConsecutiveKmers } return MustEncodeFromFormerKmer(kmer, leftKmer, leftCode) } // MustEncodeFromLatterKmer encodes from the latter k-mer, // assuming the k-mer and rightKmer are both OK. func MustEncodeFromLatterKmer(kmer []byte, rightKmer []byte, rightCode uint64) (uint64, error) { v := base2bit[kmer[0]] // if v > 3 { if v == 4 { return rightCode, ErrIllegalBase } return v<<(uint(len(kmer)-1)<<1) | rightCode>>2, nil } // EncodeFromLatterKmer encodes from the former k-mer. func EncodeFromLatterKmer(kmer []byte, rightKmer []byte, rightCode uint64) (uint64, error) { if len(kmer) == 0 { return 0, ErrKOverflow } if len(kmer) != len(rightKmer) { return 0, ErrKMismatch } if !bytes.Equal(rightKmer[0:len(kmer)-1], kmer[1:len(rightKmer)]) { return 0, ErrNotConsecutiveKmers } return MustEncodeFromLatterKmer(kmer, rightKmer, rightCode) } // Reverse returns code of the reversed sequence. func Reverse(code uint64, k int) (c uint64) { if k <= 0 || k > 32 { panic(ErrKOverflow) } // for i := 0; i < k; i++ { // c = (c << 2) | (code & 3) // code >>= 2 // } // return // https: //www.biostars.org/p/113640, with a little modification c = code c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2) c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4) c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8) c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16) c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32) return (c >> (2 * (32 - k))) } // MustReverse is similar to Reverse, but does not check k. func MustReverse(code uint64, k int) (c uint64) { // for i := 0; i < k; i++ { // c = (c << 2) | (code & 3) // code >>= 2 // } // return // https: //www.biostars.org/p/113640, with a little modification c = code c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2) c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4) c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8) c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16) c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32) return (c >> (2 * (32 - k))) } // Complement returns code of complement sequence. func Complement(code uint64, k int) uint64 { if k <= 0 || k > 32 { panic(ErrKOverflow) } return code ^ (1< 32 { panic(ErrKOverflow) } // for i := 0; i < k; i++ { // c = (c << 2) | (code&3 ^ 3) // code >>= 2 // } // return // https://www.biostars.org/p/113640/#9474334 c = ^code c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2) c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4) c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8) c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16) c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32) return (c >> (2 * (32 - k))) } // MustRevComp is similar to RevComp, but does not check k. func MustRevComp(code uint64, k int) (c uint64) { // for i := 0; i < k; i++ { // c = (c << 2) | (code&3 ^ 3) // code >>= 2 // } // return // https://www.biostars.org/p/113640/#9474334 c = ^code c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2) c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4) c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8) c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16) c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32) return (c >> (2 * (32 - k))) } // Canonical returns code of its canonical kmer. func Canonical(code uint64, k int) uint64 { if k <= 0 || k > 32 { panic(ErrKOverflow) } var rc uint64 // c := code // for i := 0; i < k; i++ { // rc = (rc << 2) | (c&3 ^ 3) // c >>= 2 // } // https://www.biostars.org/p/113640/#9474334 c := ^code c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2) c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4) c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8) c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16) c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32) rc = (c >> (2 * (32 - k))) if rc < code { return rc } return code } // MustCanonical is similar to Canonical, but does not check k. func MustCanonical(code uint64, k int) uint64 { var rc uint64 // c := code // for i := 0; i < k; i++ { // rc = (rc << 2) | (c&3 ^ 3) // c >>= 2 // } // https://www.biostars.org/p/113640/#9474334 c := ^code c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2) c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4) c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8) c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16) c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32) rc = (c >> (2 * (32 - k))) if rc < code { return rc } return code } // bit2base is for mapping bit to base. var bit2base = [4]byte{'A', 'C', 'G', 'T'} // bit2str is for output bits string var bit2str = [4]string{"00", "01", "10", "11"} // Decode converts the code to original seq func Decode(code uint64, k int) []byte { if k <= 0 || k > 32 { panic(ErrKOverflow) } if code > MaxCode[k] { panic(ErrCodeOverflow) } kmer := make([]byte, k) for i := 0; i < k; i++ { kmer[k-1-i] = bit2base[code&3] code >>= 2 } return kmer } // MustDecode is similar to Decode, but does not check k and code. func MustDecode(code uint64, k int) []byte { kmer := make([]byte, k) for i := 0; i < k; i++ { kmer[k-1-i] = bit2base[code&3] code >>= 2 } return kmer } // KmerCode is a struct representing a k-mer in 64-bits. type KmerCode struct { Code uint64 K int } // NewKmerCode returns a new KmerCode struct from byte slice. func NewKmerCode(kmer []byte) (KmerCode, error) { code, err := Encode(kmer) if err != nil { return KmerCode{}, err } return KmerCode{code, len(kmer)}, err } // NewKmerCodeFromFormerOne computes KmerCode from the Former consecutive k-mer. func NewKmerCodeFromFormerOne(kmer []byte, leftKmer []byte, preKcode KmerCode) (KmerCode, error) { code, err := EncodeFromFormerKmer(kmer, leftKmer, preKcode.Code) if err != nil { return KmerCode{}, err } return KmerCode{code, len(kmer)}, err } // NewKmerCodeMustFromFormerOne computes KmerCode from the Former consecutive k-mer, // assuming the k-mer and leftKmer are both OK. func NewKmerCodeMustFromFormerOne(kmer []byte, leftKmer []byte, preKcode KmerCode) (KmerCode, error) { code, err := MustEncodeFromFormerKmer(kmer, leftKmer, preKcode.Code) if err != nil { return KmerCode{}, err } return KmerCode{code, len(kmer)}, err } // Equal checks wether two KmerCodes are the same. func (kcode KmerCode) Equal(kcode2 KmerCode) bool { return kcode.K == kcode2.K && kcode.Code == kcode2.Code } // Rev returns KmerCode of the reverse sequence. func (kcode KmerCode) Rev() KmerCode { return KmerCode{MustReverse(kcode.Code, kcode.K), kcode.K} } // Comp returns KmerCode of the complement sequence. func (kcode KmerCode) Comp() KmerCode { return KmerCode{MustComplement(kcode.Code, kcode.K), kcode.K} } // RevComp returns KmerCode of the reverse complement sequence. func (kcode KmerCode) RevComp() KmerCode { return KmerCode{MustRevComp(kcode.Code, kcode.K), kcode.K} } // Canonical returns its canonical kmer func (kcode KmerCode) Canonical() KmerCode { rcKcode := kcode.RevComp() if rcKcode.Code < kcode.Code { return rcKcode } return kcode } // Bytes returns k-mer in []byte. func (kcode KmerCode) Bytes() []byte { return Decode(kcode.Code, kcode.K) } // String returns k-mer in string func (kcode KmerCode) String() string { return string(Decode(kcode.Code, kcode.K)) } // BitsString returns code to string func (kcode KmerCode) BitsString() string { var buf bytes.Buffer for _, b := range Decode(kcode.Code, kcode.K) { buf.WriteString(bit2str[base2bit[b]]) } return buf.String() } kmers-0.1.0/kmer_test.go000066400000000000000000000151631414221643400151640ustar00rootroot00000000000000// Copyright © 2018-2021 Wei Shen // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package kmers import ( "bytes" "fmt" "math/rand" "testing" ) var randomMers [][]byte var randomMersN = 100000 var benchMer = []byte("ACTGactgGTCAgtcaactgGTCAACTGGTCA") var codeBenchMer uint64 = 2170370756141391540 var benchMer2 = []byte("CTGactgGTCAgtcaactgGTCAACTGGTCAC") var codeBenchMer2 uint64 = 8681483024565566161 var benchCode uint64 var benchKmerCode KmerCode func init() { randomMers = make([][]byte, randomMersN) for i := 0; i < randomMersN; i++ { randomMers[i] = make([]byte, rand.Intn(32)+1) for j := range randomMers[i] { randomMers[i][j] = bit2base[rand.Intn(4)] } } // for benchmark var err error benchCode, err = Encode(benchMer) if err != nil { panic(fmt.Sprintf("init: fail to encode %s", benchMer)) } benchKmerCode, err = NewKmerCode(benchMer) if err != nil { panic(fmt.Sprintf("init: fail to create KmerCode from %s", benchMer)) } } // TestEncodeDecode tests encode and decode func TestEncodeDecode(t *testing.T) { var kcode KmerCode var err error for _, mer := range randomMers { kcode, err = NewKmerCode(mer) // encode if err != nil { t.Errorf("Encode error: %s", mer) } if !bytes.Equal(mer, kcode.Bytes()) { // decode t.Errorf("Decode error: %s != %s ", mer, kcode.Bytes()) } } } // TestEncodeFromFormerKmer tests TestEncodeFromFormerKmer func TestEncodeFromFormerKmer(t *testing.T) { var err error k := 5 first := true var code, code0, pCode uint64 var kmer, pKmer []byte for i := 0; i < len(benchMer)-k; i++ { kmer = benchMer[i : i+k] if first { code, err = Encode(kmer) if err != nil { t.Errorf("Encode error: %s", kmer) } pCode = code first = false continue } pKmer = benchMer[i-1 : i+k-1] code, err = EncodeFromFormerKmer(kmer, pKmer, pCode) if err != nil { t.Errorf("Encode error: %s", kmer) } code0, err = Encode(kmer) if err != nil { t.Errorf("Encode error: %s", kmer) } if code0 != code { t.Errorf("EncodeFromFormerKmer error for %s: wrong %d != right %d", kmer, code, code0) } pCode = code } } func TestEncodeFromLatterKmer(t *testing.T) { var err error k := 5 first := true var code, code0, pCode uint64 var kmer, pKmer []byte for i := len(benchMer) - k - 1; i >= 0; i-- { kmer = benchMer[i : i+k] if first { code, err = Encode(kmer) if err != nil { t.Errorf("Encode error: %s", kmer) } pCode = code first = false continue } pKmer = benchMer[i+1 : i+k+1] code, err = EncodeFromLatterKmer(kmer, pKmer, pCode) if err != nil { t.Errorf("Encode error: %s", kmer) } code0, err = Encode(kmer) if err != nil { t.Errorf("Encode error: %s", kmer) } if code0 != code { t.Errorf("EncodeFromLatterKmer error for %s: wrong %d != right %d", kmer, code, code0) } pCode = code } } // TestRevComp tests revcomp func TestRevComp(t *testing.T) { var kcode KmerCode for _, mer := range randomMers { kcode, _ = NewKmerCode(mer) // fmt.Printf("%s, rev:%s\n", kcode, kcode.Rev()) } for _, mer := range randomMers { kcode, _ = NewKmerCode(mer) if !kcode.Rev().Rev().Equal(kcode) { t.Errorf("Rev() error: %s, Rev(): %s", kcode, kcode.Rev()) } if !kcode.Comp().Comp().Equal(kcode) { t.Errorf("Comp() error: %s, Comp(): %s", kcode, kcode.Comp()) } if !kcode.Comp().Rev().Equal(kcode.RevComp()) { t.Errorf("Rev().Comp() error: %s, Rev(): %s, Comp(): %s, RevComp: %s", kcode, kcode.Rev(), kcode.Comp(), kcode.RevComp()) } } } var result uint64 // BenchmarkEncode tests speed of Encode() func BenchmarkEncodeK32(b *testing.B) { var code uint64 var err error for i := 0; i < b.N; i++ { code, err = Encode(benchMer) if err != nil { b.Errorf("Encode error: %s", benchMer) } if code != codeBenchMer { b.Errorf("wrong result: %s", benchMer) } } result = code } // BenchmarkEncode tests speed of EncodeFromFormerKmer func BenchmarkEncodeFromFormerKmerK32(b *testing.B) { var code uint64 var err error for i := 0; i < b.N; i++ { code, err = EncodeFromFormerKmer(benchMer2, benchMer, benchCode) if err != nil { b.Errorf("Encode error: %s", benchMer) } if code != codeBenchMer2 { b.Errorf("wrong result: %s", benchMer) } } result = code } // BenchmarkEncode tests speed of MustEncodeFromFormerKmer func BenchmarkMustEncodeFromFormerKmerK32(b *testing.B) { var code uint64 var err error for i := 0; i < b.N; i++ { code, err = MustEncodeFromFormerKmer(benchMer2, benchMer, benchCode) if err != nil { b.Errorf("Encode error: %s", benchMer) } if code != codeBenchMer2 { b.Errorf("wrong result: %s", benchMer) } } result = code } var result2 []byte // BenchmarkDecode tests speed of decode func BenchmarkDecodeK32(b *testing.B) { var r []byte for i := 0; i < b.N; i++ { r = Decode(benchCode, len(benchMer)) } result2 = r } func BenchmarkMustDecodeK32(b *testing.B) { var r []byte for i := 0; i < b.N; i++ { r = MustDecode(benchCode, len(benchMer)) } result2 = r } var result3 KmerCode // BenchmarkRevK32 tests speed of rev func BenchmarkRevK32(b *testing.B) { var r KmerCode for i := 0; i < b.N; i++ { r = benchKmerCode.Rev() } result3 = r } // BenchmarkRevK32 tests speed of comp func BenchmarkCompK32(b *testing.B) { var r KmerCode for i := 0; i < b.N; i++ { r = benchKmerCode.Comp() } result3 = r } // BenchmarkRevCompK32 tests speed of revcomp func BenchmarkRevCompK32(b *testing.B) { var r KmerCode for i := 0; i < b.N; i++ { r = benchKmerCode.RevComp() } result3 = r } func BenchmarkCannonalK32(b *testing.B) { var r KmerCode for i := 0; i < b.N; i++ { r = benchKmerCode.Canonical() } result3 = r }