pax_global_header00006660000000000000000000000064124226674110014517gustar00rootroot0000000000000052 comment=6a7f27d9861c3b81cb9084a909e24da8091d1d19 node-keese-1.1.1/000077500000000000000000000000001242266741100135365ustar00rootroot00000000000000node-keese-1.1.1/LICENSE000066400000000000000000000020651242266741100145460ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2014 Josh Wolfe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. node-keese-1.1.1/README.md000066400000000000000000000304051242266741100150170ustar00rootroot00000000000000node-keese ========== Generates arbitrary-precision, comparable values, appropriate for use as sorting keys. keese can always generate a bigger value, a smaller value, and a value between two other values. This is trivial using numbers with `x+1`, `x-1`, and `(x+y)/2` respectively. However, numbers have limited precision in JavaScript (see below), so instead keese uses strings. The string values are comparable with the builtin comparison operators (such as `<`), and keese can *always* generate a new value that satisfies the constraints (limited only by system resources). ```js var keese = require('keese'); var something = keese(); var bigger = keese(something, null); var smaller = keese(null, something); // smaller < something < bigger var medium = keese(smaller, bigger); // smaller < medium < bigger // but no guarantee about middle vs something var values = keese(smaller, bigger, 10); // values is an array of 10 ascending items between smaller and bigger ``` Formally: ```js var middle = keese(low, high, count); ``` Where: * `low` and `high` must each either be `== null` or be values from a previous call to `keese`. * `count` must be either `== null` or a non-negative integer (type `Number`). * If `count == null`: * If `low != null`, then `low < middle`. * If `high != null`, then `middle < high`. * If `count != null`, `middle` is an Array of size `count` values, and if `count > 0`: * If `low != null`, then `low < middle[0]`. * If `high != null`, then `middle[middle.length - 1] < high`. * The values in `middle` are in ascending order (i.e. `middle.sort()` will have no effect). `keese` is a [pure function](http://en.wikipedia.org/wiki/Pure_function). Why would I want this? ---------------------- Say you have a client-server architecture where clients can edit an ordered list of items. Clients can insert, delete, and move items around in the list. ([Groove Basin](https://github.com/superjoe30/groovebasin) does this with the current playlist of songs.) The naive approach would be to use an Array, and communicate about where an operation is happening by using an index into the array. For example, "add item x at index 5", or "delete item at index 2" or "move item at index 7 to index 10", etc. This works well enough, but race conditions can cause sad behavior. Imagine all three of the above commands are sent to the server at once from different clients. Say the server receives the "delete" command first, and shifts all the items above index 2 down 1. Now the "move" command referencing the item at index 7 is actually talking about a different item, one that was originally at index 8. The "add" command is similarly misinterpreted, because the client may have specifically wanted "item x" to be inserted immediately after a particular item. Another naive approach is to communicate about locations relative to existing items. For example, "add item x just after item y", or "delete item y". Do you see the problem here? Say a client wants to insert "item x" in the middle of a range of 10 items, but another client deletes those 10 items before the insert request can be processed. There's no guarantee that the item(s) a client refers to will still exist on the server by the time the request is processed. The most robust solution is to communicate about locations using arbitrary sorting keys. Give every item a different value such that when the items are sorted using the values, they are ordered appropriately. For example, start out by giving each of 10 items in the list the values 1 through 10 as their sorting keys. Now if a client deletes the item with the sorting key of 2, there's no need to shift anything; just leave the other sorting keys where they are. When a client wants to insert an item between 5 and 6, give the new item a sorting key of 5.5. When a client wants to move an item, change the items sorting key to some other value. After performing each of these operations, simply sort the list again, and the items will be in the desired order. By using sorting keys, the opportunity for race conditions is almost entirely eliminated. There can still be race conditions when there is truly no possible automatic solution, such as two clients inserting different items into the same location, or two clients both trying to delete the same item at once. However, these problems usually have trivial solutions, and they are outside the scope of this project. keese exists for the purpose of generating sorting keys that never run out of precision. Numbers have limited precision ------------------------------ This code snippet shows how many times you can obtain a middle value using a JavaScript `Number` and still get a meaningful result. ```js var a = 1; var b = 2; var count = 0; while (a !== b) { count += 1; a = (a + b) / 2.0; } console.log(count); ``` Output: 53 Comparing that to keese, we generate a middle value 53 times and then check the result. ```js var keese = require('keese'); var low = keese(); var high = keese(low, null); for (var i = 0; i < 53; i += 1) { high = keese(low, high); } console.log(high); ``` Output: "1000000002" This takes up a few more bytes than a `Number`. However, unlike what happens when you generate a middle value 53 times with a `Number`, we actually have a usable value to compare. How it works ------------ The problem with JavaScript `Number`s is that they have limited precision. In order to get arbitrary numeric precision, we need more than a single primitive number value; we need an arbitrarily-large array of numbers. We could use JavaScript `Array`s, but `String`s are better for the following reasons: strings are more compact in JSON (and probably in memory), and strings can be compared easily with a builtin function (the `<` operator), which is convenient (and probably much more efficient that writing a custom Array comparator). Being able to compare strings using `<` (called lexicographical ordering) is a driving principle in the design of this library. So how do we encode arbitrary precision numbers in strings in such a way as to preserve lexicographical ordering? Base 10 is a good place to start. What comes between `"0.1"` and `"0.2"`? The numeric answer `"0.15"` satisfies the lexicographical ordering; adding digits to the end of the smaller string is a good way to implement going in between two values. The problem with a naive base 10 encoding is that adding digits to the left breaks lexicographical ordering. `9 < 10` but `"9" > "10"`. The problem is that the "ones" digit `"9"` is being compared to the "tens" digit `"1"` in `"10"`. The common way to solve this frame-shift error is to pad any small numbers with `"0"`s. `"09" < "10"`. This is obviously problematic, because we are forced to limit the number of digits with this strategy, thereby failing to have arbitrary precision. The solution is to reserve a special digit `"~"` that has a larger character value than any other character in the alphabet, and use this character to make sure we never get frame shift errors. `"9" < "~10"`. `"~99" < "~~100"`. If two values have different orders of magnitude, then a `"~"` will inevitably be compared against a character that is not a `"~"`, and no actual digit values will ever be compared. (While this doubles the number of digits for large integers, remember that the number of digits is still O(n).) With this lexicographically-correct magnitude specification, we have no need for any decimal points, which in common notation accomplish the same purpose. We can write `"21"` instead of `"2.1"`, because we know that `"~12"` is bigger due to the `"~"`. One last problem remains, which is how to generate a value smaller than the parameter. We have no way of encoding negative numbers lexicographically correctly; all the digit values would be inverted, and comparisons would be all wrong. The solution to this is surprisingly trivial; keep a "smallest value" secret from the client, and implement the "smaller" function by going in between the "smallest value" and the parameter. We reserve the number `0`, encoded as `""`, as the smallest value, and effectively halve any parameter to generate a smaller value. (It would be more efficient to use a special character, such as `"!"`, to signify negative magnitude. Oh well. Maybe in a future version of keese.) As a matter of efficiency, using base 10 would only encode 10 values per character, but we can easily encode many more. JavaScript strings are made of 16-bit characters, so the maximum density we can achieve is radix 65536 (not counting the magnitude specifier). However, many characters in this range are unreadable and untypable, and some JSON libraries (such as in python) will escape non-ascii values with `"\u1234"` notation by default, so radix 65536 may not be worth the trouble. Following the example of the base64 encoding, keese uses radix 64 with all printable ascii characters. This is the alphabet: ``` 0123456789?@ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ``` plus `"~"` for the magnitude specification. Here are some example encoded values: * `keese()` returns "1", the number `1`. * `"z"` is the number `63`. * `keese("z", null)` returns "~10", the number `64`. * `keese("1", "2")` returns "1U", the number `1.5`. Algorithmic Complexity ---------------------- The runtime performance of calling `keese` once is proportional to the size of the inputs. Formally, the runtime of `keese(low, high)` is `O(low.length + high.length)`. The size of the return value depends on the value of the input parameters. Informally: * initializing: `keese()` is `O(1)` * increasing: `keese(low, null)` is `O(log(n))` * decreasing: `keese(null, high)` is `O(n)` (probably could be improved) * betweening: `keese(low, high)` is `O(n)` Where `n` is how many times `keese` has been called to get the input value. More formally, start with `var x = keese()`, run the below code, then analyze the size of `x` in terms of `n`. Increasing (`O(log(n))`): ```js for (var i = 0; i < n; i++) { x = keese(x, null); } ``` Decreasing (`O(n)` - probably could be improved): ```js for (var i = 0; i < n; i++) { x = keese(null, x); } ``` Betweening (`O(n)`): ```js var y = keese(x, null); // or any other value for (var i = 0; i < n; i++) { if (Math.random() > 0.5) { x = keese(x, y); } else { y = keese(x, y); } } ``` I believe it is provable that betweening cannot do any better than `O(n)`: * Each value returned from `keese(x, y)` could be assigned to either `x` or `y`. * The next call to `keese(x, y)` must return a value that takes into account whether `x` or `y` was chosen in the previous step. Because of this, the return value effectively encodes the decision of whether `x` or `y` was chosen. * This information is not lost on the next call to `keese(x, y)`. Therefore, a value obtained through the algorithm above must encode a complete history of each decision. * Each of the `n` decisions must occupy a minimum of 1 bit of space in the string, therefore the size of the string is `O(n)`. The Count Parameter ------------------- The naive way to generate `n` values at once would be: ```js function generateValues(low, high, count) { var result = []; for (var i = 0; i < count; i++) { var value = keese(low, high); result.push(value); low = value; } return result; } ``` This results in values with `O(count)` size (see discussion on algorithmic complexity above). A better algorithm would be to fill in an array using a binary-tree descent pattern: generate a value for the middle element of the array, and then recurse on each of the left and right remaining spaces. ```js function generateValues(low, high, count) { var result = new Array(count); if (count > 0) recurse(low, high, 0, count); return result; function recurse(low_value, high_value, low_index, high_index) { var mid_index = Math.floor((low_index + high_index) / 2); var mid_value = single_keese(low_value, high_value); result[mid_index] = mid_value; if (low_index < mid_index) recurse(low_value, mid_value, low_index, mid_index); if (mid_index + 1 < high_index) recurse(mid_value, high_value, mid_index + 1, high_index); } } ``` This generates values with only `O(log(count))` size. This is the optimal algorithmic complexity for such a task. Since this algorithm is probably useful to many clients and a bit cumbersome to implement yourself, keese provides an implementation via the optional `count` parameter to `keese()`. node-keese-1.1.1/index.js000066400000000000000000000107311242266741100152050ustar00rootroot00000000000000 // the basic characters in sorted order var alphabet = "0123456789?@ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; var radix = alphabet.length; // bigger than all the basic characters var order_specifier = "~"; // character to numerical value aka index of the character // "0": 0, "z": 63, etc. var values = (function() { var values = {}; for (var i = 0; i < alphabet.length; i++) values[alphabet[i]] = i; return values; })(); module.exports = keese; function keese(low, high, count) { if (count != null) { return multi_keese(low, high, count); } else { return single_keese(low, high); } } function single_keese(low, high) { if (low == null) { if (high == null) { // return anything above 0 return "1"; } else { // go smaller return average("0", high); } } else { if (high == null) { // go bigger return increment(low); } else { // go in between return average(low, high); } } } function multi_keese(low, high, count) { var result = new Array(count); if (count > 0) { if (high == null) { // just allocate straight forward for (var i = 0; i < count; i++) { var value = keese(low, null); result[i] = value; low = value; } } else { // binary tree descent recurse(low, high, 0, count); } } return result; function recurse(low_value, high_value, low_index, high_index) { var mid_index = Math.floor((low_index + high_index) / 2); var mid_value = single_keese(low_value, high_value); result[mid_index] = mid_value; if (low_index < mid_index) recurse(low_value, mid_value, low_index, mid_index); if (mid_index + 1 < high_index) recurse(mid_value, high_value, mid_index + 1, high_index); } } function increment(value) { var n = parse(value); // drop the fraction n.digits = n.digits.substr(0, n.order_length + 1); return add(n, parse("1")); } function average(low, high) { if (!(low < high)) { throw new Error("assertion failed: " + JSON.stringify(low) + " < " + JSON.stringify(high)); } var a = parse(low); var b = parse(high); pad_to_equal_order(a, b); var b_carry = 0; var max_digit_length = Math.max(a.digits.length, b.digits.length); for (var i = 0; i < max_digit_length || b_carry > 0; i++) { var a_value = values[a.digits[i]] || 0; var b_value = b_carry + (values[b.digits[i]] || 0); if (a_value === b_value) continue; if (a_value === b_value - 1) { // we need more digits, but remember that b is ahead b_carry = radix; continue; } // we have a distance of at least 2 between the values. // half the distance floored is sure to be a positive single digit. var half_distance_value = Math.floor((b_value - a_value) / 2); var half_distance_digits = ""; for (var j = 0; j < i; j++) half_distance_digits += "0"; half_distance_digits += alphabet[half_distance_value]; var half_distance = parse(construct(a.order_length, half_distance_digits)); // truncate insignificant digits of a a.digits = a.digits.substr(0, i + 1); return add(a, half_distance); } throw new Error; // unreachable } function add(a, b) { pad_to_equal_order(a, b); var result_digits = ""; var order_length = a.order_length; var value = 0; for (var i = Math.max(a.digits.length, b.digits.length) - 1; i >= 0; i--) { value += values[a.digits[i]] || 0; value += values[b.digits[i]] || 0; result_digits = alphabet[value % radix] + result_digits; value = Math.floor(value / radix); } // overflow up to moar digits while (value > 0) { result_digits = alphabet[value % radix] + result_digits; value = Math.floor(value / radix); order_length++; } return construct(order_length, result_digits); } function parse(value) { var order_length = value.lastIndexOf(order_specifier) + 1; return { order_length: order_length, digits: value.substr(order_length) }; } function construct(order_length, digits) { // strip unnecessary leading zeros while (order_length > 0 && digits.charAt(0) == "0") { digits = digits.substr(1); order_length--; } var result = ""; for (var i = 0; i < order_length; i++) result += order_specifier; return result + digits; } function pad_to_equal_order(a, b) { pad_in_place(a, b.order_length); pad_in_place(b, a.order_length); } function pad_in_place(n, order_length) { while (n.order_length < order_length) { n.digits = "0" + n.digits; n.order_length++; } } node-keese-1.1.1/package.json000066400000000000000000000010221242266741100160170ustar00rootroot00000000000000{ "name": "keese", "version": "1.1.1", "description": "Generates arbitrary-precision, comparable values, appropriate for use as sorting keys", "main": "index.js", "scripts": { "test": "node test.js" }, "repository": { "type": "git", "url": "git://github.com/thejoshwolfe/node-keese.git" }, "author": "Josh Wolfe ", "license": "MIT", "bugs": { "url": "https://github.com/thejoshwolfe/node-keese/issues" }, "homepage": "https://github.com/thejoshwolfe/node-keese" } node-keese-1.1.1/test.js000066400000000000000000000056441242266741100150640ustar00rootroot00000000000000 var keese = require('./'); var assert = require('assert'); basicTest(); overflowTest(); countTest(); function basicTest() { var b = keese(null, null); var d = keese(b, null); assert(b < d); // forwards var c = keese(b, d); assert(b < c); // between assert(c < d); var a = keese(null, b); assert(a < b); // backwards } function overflowTest() { function testExtremeNext() { var biggest_single_digit; var multi_digits = []; var previous = null; var n = keese(); for (var i = 0; i < 10000; i++) { if (previous !== null) { assertLessThan(previous, n); testBetween(previous, n); } if (n.length === 1) { biggest_single_digit = n; } else if (multi_digits.length < 2) { multi_digits.push(n); } previous = n; n = keese(n, null); } if (multi_digits[1]) { testBetween(biggest_single_digit, multi_digits[1]); } function testBetween(a, c) { var b = keese(a, c); assertLessThan(a, b); assertLessThan(b, c); } return n; } var big_number = testExtremeNext(); function testExtremeBetween(lower, upper, forward_func) { for (var i = 0; i < 1000; i++) { var middle = keese(lower, upper); assertLessThan(lower, middle); assertLessThan(middle, upper); if (forward_func(i)) { lower = middle; } else { upper = middle; } } } (function() { var one = keese(); var two = keese(one, null); var forward_funcs = [ function() { return true; }, function() { return false; }, // arbitrarilly descend forwards or backwards pseudo randomly or whatever function(i) { return i % 3 === 0 || i % 7 > 3; }, ]; var boundses = [ [one, two], [one, big_number], ]; for (var i = 0; i < boundses.length; i++) { for (var j = 0; j < forward_funcs.length; j++) { testExtremeBetween(boundses[i][0], boundses[i][1], forward_funcs[j]); } } })(); assert.throws(function() { keese(keese(), keese()); }); function assertLessThan(a, b) { assert(a < b, JSON.stringify(a) + " < " + JSON.stringify(b)); } } function countTest() { var some_value = keese(); runTests(null, null); runTests(some_value, null); runTests(null, some_value); runTests(some_value, keese(some_value)); function runTests(low, high) { testSize(0); testSize(1); testSize(2); testSize(3); testSize(1000); function testSize(size) { var array = keese(low, high, size); assert(array.length === size, "array size expected to be: " + size); var previous = low; array.forEach(function(item) { if (previous != null) assert(previous < item, JSON.stringify(previous) + " < " + JSON.stringify(item)); previous = item; }); if (previous != null && high != null) assert(previous < high, JSON.stringify(previous) + " < " + JSON.stringify(high)); } } }