pax_global_header00006660000000000000000000000064124320335330014510gustar00rootroot0000000000000052 comment=26d6bb6e1e5f50152d683b6f806351dea61868e6 wcwidth.js-1.0.0/000077500000000000000000000000001243203353300135725ustar00rootroot00000000000000wcwidth.js-1.0.0/.gitignore000066400000000000000000000000161243203353300155570ustar00rootroot00000000000000node_modules/ wcwidth.js-1.0.0/.npmignore000066400000000000000000000000571243203353300155730ustar00rootroot00000000000000.hg .hgtags .hgignore .npmignore node_modules/ wcwidth.js-1.0.0/INSTALL.md000066400000000000000000000007031243203353300152220ustar00rootroot00000000000000How to build and install wcwidth.js =================================== This package does not provide an automated way to build or install the library except using [`npm`](http://npmjs.org/package/wcwidth.js) because `wcwidth.js` is intended to runs on top of [`node.js`](http://nodejs.org). If you have `node.js` in your system, npm install wcwidth.js brings the latest version of `wcwidth.js` and installs it with its all depending packages. wcwidth.js-1.0.0/LICENSE.md000066400000000000000000000030211243203353300151720ustar00rootroot00000000000000wcwidth.js: a javascript portng of C's wcwidth() ================================================ Copyright (C) 2012-2014 by Jun Woong and Tim Oxley. This package is a javascript porting of `wcwidth()` implementation [by Markus Kuhn](http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. wcwidth.js-1.0.0/NEWS000066400000000000000000000005341243203353300142730ustar00rootroot00000000000000What's new in wcwidth.js? ========================= 2014-11-16 1.0.0 released 2014-09-18 Restructued code and added tests by Tim Oxley 2014-04-26 0.0.4 released 2012-12-13 0.0.3 released 2012-12-12 Moved the repository to git/github 2012-10-16 0.0.2 released 2012-10-16 Using `wcwidth' without invoking it checked 2012-10-12 First release wcwidth.js-1.0.0/README.md000066400000000000000000000103001243203353300150430ustar00rootroot00000000000000wcwidth.js: a javascript porting of C's wcwidth() ================================================= `wcwidth.js` is a simple javascript porting of `wcwidth()` implemented in C [by Markus Kuhn](http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c). [`wcwidth()`](http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html) and its string version, [`wcswidth()`](http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html) are defined by IEEE Std 1002.1-2001, a.k.a. POSIX.1-2001, and return the number of columns used to represent a wide character and string on fixed-width output devices like terminals. Markus's implementation assumes wide characters to be encoded in [ISO 10646](http://en.wikipedia.org/wiki/Universal_Character_Set), which is _almost_ true for JavaScript; _almost_ because JavaScript uses [UCS-2](http://en.wikipedia.org/wiki/UTF-16) and has problems with surrogate pairs. `wcwidth.js` converts surrogate pairs to Unicode code points to handle them correctly. Following the original implementation, this library defines the column width of an ISO 10646 character as follows: - the null character (`U+0000`) has a column width of `opts.null` (whose default value is 0); - other [C0/C1 control characters](http://en.wikipedia.org/wiki/C0_and_C1_control_codes) and `DEL` will lead to a column width of `opts.control` (whose default value is 0); - non-spacing and enclosing combining characters ([general category code](http://www.unicode.org/reports/tr44/#GC_Values_Table) `Mn` or `Me`) in the Unicode database) have a column width of 0; - `SOFT HYPHEN` (`U+00AD`) has a column width of 1; - other format characters (general category code `Cf` in the Unicode database) and `ZERO WIDTH SPACE` (`U+200B`) have a column width of 0; - Hangul Jamo medial vowels and final consonants (`U+1160`-`U+11FF`) have a column width of 0; - spacing characters in the East Asian Wide (`W`) or East Asian Full-width (`F`) category as defined in [Unicode Technical Report #11](http://www.unicode.org/reports/tr11/) have a column width of 2; and - all remaining characters (including all printable [ISO 8859-1](http://en.wikipedia.org/wiki/ISO/IEC_8859-1) and [WGL4 characters](http://en.wikipedia.org/wiki/Windows_Glyph_List_4), Unicode control characters, etc.) have a column width of 1. A surrogate high or low value which constitutes no pair is considered to have a column width of 1 according to the behavior of widespread terminals. See the [documentation](https://github.com/mycoboco/wcwidth.js/blob/master/doc/index.md) from the C implementation for details. `wcwidth.js` is simple to use: var wcwidth = require('wcwidth.js') wcwidth('한글') // 4 wcwidth('\0') // 0; NUL wcwidth('\t') // 0; control characters If you plan to replace `NUL` or control characters with, say, `???` before printing, use `wcwidth.config()` that returns a closure to run `wcwidth` with your configuration: var mywidth = wcwidth.config({ nul: 3, control: 3 }) mywidth('\0\f') // 6 mywidth('한\t글') // 7 Setting these options to -1 gives a function that returns -1 for a string containing an instance of `NUL` or control characters: mywidth = wcwidth.config({ nul: 0, control: -1 }) mywidth('java\0script') // 10 mywidth('java\tscript') // -1 This is useful when detecting if a string has non-printable characters. Due to the risk of monkey-patching, the `String` getter is no longer provided. Even if discouraged, you can still monkey-patch by yourself as follows: String.prototype.__defineGetter__('wcwidth', function () { return wcwidth(this); }) '한글'.wcwidth // 4 JavaScript has no character type, thus meaningless to have two versions of `wcwidth` while POSIX does for C. `wcwidth` also accepts a code value obtained by `charCodeAt()`: wcwidth('한') // prints 2 wcwidth('글'.charCodeAt(0)) // prints 2 `INSTALL.md` explains how to build and install the library. For the copyright issues, see the accompanying `LICENSE.md` file. If you have a question or suggestion, do not hesitate to contact me via email (woong.jun at gmail.com) or web (http://code.woong.org/). wcwidth.js-1.0.0/combining.js000066400000000000000000000061231243203353300160770ustar00rootroot00000000000000/* * look-up table for non-spacing characters */ module.exports = [ [ 0x0300, 0x036F ], [ 0x0483, 0x0486 ], [ 0x0488, 0x0489 ], [ 0x0591, 0x05BD ], [ 0x05BF, 0x05BF ], [ 0x05C1, 0x05C2 ], [ 0x05C4, 0x05C5 ], [ 0x05C7, 0x05C7 ], [ 0x0600, 0x0603 ], [ 0x0610, 0x0615 ], [ 0x064B, 0x065E ], [ 0x0670, 0x0670 ], [ 0x06D6, 0x06E4 ], [ 0x06E7, 0x06E8 ], [ 0x06EA, 0x06ED ], [ 0x070F, 0x070F ], [ 0x0711, 0x0711 ], [ 0x0730, 0x074A ], [ 0x07A6, 0x07B0 ], [ 0x07EB, 0x07F3 ], [ 0x0901, 0x0902 ], [ 0x093C, 0x093C ], [ 0x0941, 0x0948 ], [ 0x094D, 0x094D ], [ 0x0951, 0x0954 ], [ 0x0962, 0x0963 ], [ 0x0981, 0x0981 ], [ 0x09BC, 0x09BC ], [ 0x09C1, 0x09C4 ], [ 0x09CD, 0x09CD ], [ 0x09E2, 0x09E3 ], [ 0x0A01, 0x0A02 ], [ 0x0A3C, 0x0A3C ], [ 0x0A41, 0x0A42 ], [ 0x0A47, 0x0A48 ], [ 0x0A4B, 0x0A4D ], [ 0x0A70, 0x0A71 ], [ 0x0A81, 0x0A82 ], [ 0x0ABC, 0x0ABC ], [ 0x0AC1, 0x0AC5 ], [ 0x0AC7, 0x0AC8 ], [ 0x0ACD, 0x0ACD ], [ 0x0AE2, 0x0AE3 ], [ 0x0B01, 0x0B01 ], [ 0x0B3C, 0x0B3C ], [ 0x0B3F, 0x0B3F ], [ 0x0B41, 0x0B43 ], [ 0x0B4D, 0x0B4D ], [ 0x0B56, 0x0B56 ], [ 0x0B82, 0x0B82 ], [ 0x0BC0, 0x0BC0 ], [ 0x0BCD, 0x0BCD ], [ 0x0C3E, 0x0C40 ], [ 0x0C46, 0x0C48 ], [ 0x0C4A, 0x0C4D ], [ 0x0C55, 0x0C56 ], [ 0x0CBC, 0x0CBC ], [ 0x0CBF, 0x0CBF ], [ 0x0CC6, 0x0CC6 ], [ 0x0CCC, 0x0CCD ], [ 0x0CE2, 0x0CE3 ], [ 0x0D41, 0x0D43 ], [ 0x0D4D, 0x0D4D ], [ 0x0DCA, 0x0DCA ], [ 0x0DD2, 0x0DD4 ], [ 0x0DD6, 0x0DD6 ], [ 0x0E31, 0x0E31 ], [ 0x0E34, 0x0E3A ], [ 0x0E47, 0x0E4E ], [ 0x0EB1, 0x0EB1 ], [ 0x0EB4, 0x0EB9 ], [ 0x0EBB, 0x0EBC ], [ 0x0EC8, 0x0ECD ], [ 0x0F18, 0x0F19 ], [ 0x0F35, 0x0F35 ], [ 0x0F37, 0x0F37 ], [ 0x0F39, 0x0F39 ], [ 0x0F71, 0x0F7E ], [ 0x0F80, 0x0F84 ], [ 0x0F86, 0x0F87 ], [ 0x0F90, 0x0F97 ], [ 0x0F99, 0x0FBC ], [ 0x0FC6, 0x0FC6 ], [ 0x102D, 0x1030 ], [ 0x1032, 0x1032 ], [ 0x1036, 0x1037 ], [ 0x1039, 0x1039 ], [ 0x1058, 0x1059 ], [ 0x1160, 0x11FF ], [ 0x135F, 0x135F ], [ 0x1712, 0x1714 ], [ 0x1732, 0x1734 ], [ 0x1752, 0x1753 ], [ 0x1772, 0x1773 ], [ 0x17B4, 0x17B5 ], [ 0x17B7, 0x17BD ], [ 0x17C6, 0x17C6 ], [ 0x17C9, 0x17D3 ], [ 0x17DD, 0x17DD ], [ 0x180B, 0x180D ], [ 0x18A9, 0x18A9 ], [ 0x1920, 0x1922 ], [ 0x1927, 0x1928 ], [ 0x1932, 0x1932 ], [ 0x1939, 0x193B ], [ 0x1A17, 0x1A18 ], [ 0x1B00, 0x1B03 ], [ 0x1B34, 0x1B34 ], [ 0x1B36, 0x1B3A ], [ 0x1B3C, 0x1B3C ], [ 0x1B42, 0x1B42 ], [ 0x1B6B, 0x1B73 ], [ 0x1DC0, 0x1DCA ], [ 0x1DFE, 0x1DFF ], [ 0x200B, 0x200F ], [ 0x202A, 0x202E ], [ 0x2060, 0x2063 ], [ 0x206A, 0x206F ], [ 0x20D0, 0x20EF ], [ 0x302A, 0x302F ], [ 0x3099, 0x309A ], [ 0xA806, 0xA806 ], [ 0xA80B, 0xA80B ], [ 0xA825, 0xA826 ], [ 0xFB1E, 0xFB1E ], [ 0xFE00, 0xFE0F ], [ 0xFE20, 0xFE23 ], [ 0xFEFF, 0xFEFF ], [ 0xFFF9, 0xFFFB ], [ 0x10A01, 0x10A03 ], [ 0x10A05, 0x10A06 ], [ 0x10A0C, 0x10A0F ], [ 0x10A38, 0x10A3A ], [ 0x10A3F, 0x10A3F ], [ 0x1D167, 0x1D169 ], [ 0x1D173, 0x1D182 ], [ 0x1D185, 0x1D18B ], [ 0x1D1AA, 0x1D1AD ], [ 0x1D242, 0x1D244 ], [ 0xE0001, 0xE0001 ], [ 0xE0020, 0xE007F ], [ 0xE0100, 0xE01EF ] ] // end of combining.js wcwidth.js-1.0.0/doc/000077500000000000000000000000001243203353300143375ustar00rootroot00000000000000wcwidth.js-1.0.0/doc/index.md000066400000000000000000000064551243203353300160020ustar00rootroot00000000000000wcwidth.js: a javascript porting of C's wcwidth() ================================================= The following explanation comes from the [original C implementation](http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c) with editorial changes for [markdown](http://daringfireball.net/projects/markdown/): This is an implementation of [`wcwidth()`](http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html) and [`wcswidth()`](http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html) (defined in IEEE Std 1002.1-2001) for [Unicode](http://en.wikipedia.org/wiki/Unicode). In fixed-width output devices, Latin characters all occupy a single "cell" position of equal width, whereas [ideographic CJK characters](http://en.wikipedia.org/wiki/CJK_Unified_Ideographs) occupy two such cells. Interoperability between terminal-line applications and (teletype-style) character terminals using the [UTF-8 encoding](http://en.wikipedia.org/wiki/UTF-8) requires agreement on which character should advance the cursor by how many cell positions. No established formal standards exist at present on which Unicode character shall occupy how many cell positions on character terminals. These routines are a first attempt of defining such behavior based on simple rules applied to data provided by the [Unicode Consortium](http://www.unicode.org/). For some graphical characters, the Unicode standard explicitly defines a character-cell width via the definition of the East Asian FullWidth (`F`), Wide (`W`), Half-width (`H`), and Narrow (`Na`) classes. In all these cases, there is no ambiguity about which width a terminal shall use. For characters in the East Asian Ambiguous (`A`) class, the width choice depends purely on a preference of backward compatibility with either historic CJK or Western practice. Choosing single-width for these characters is easy to justify as the appropriate long-term solution, as the CJK practice of displaying these characters as double-width comes from historic implementation simplicity (8-bit encoded characters were displayed single-width and 16-bit ones double-width, even for Greek, Cyrillic, etc.) and not any typographic considerations. Much less clear is the choice of width for the Not East Asian (Neutral) class. Existing practice does not dictate a width for any of these characters. It would nevertheless make sense typographically to allocate two character cells to characters such as for instance `EM SPACE` or `VOLUME INTEGRAL`, which cannot be represented adequately with a single-width glyph. The following routines at present merely assign a single-cell width to all neutral characters, in the interest of simplicity. This is not entirely satisfactory and should be reconsidered before establishing a formal standard in this area. At the moment, the decision which Not East Asian (Neutral) characters should be represented by double-width glyphs cannot yet be answered by applying a simple rule from the Unicode database content. Setting up a proper standard for the behavior of UTF-8 character terminals will require a careful analysis not only of each Unicode character, but also of each presentation form, something the author of these routines has avoided to do so far. [http://www.unicode.org/unicode/reports/tr11/](http://www.unicode.org/unicode/reports/tr11/) _Markus Kuhn_ -- 2007-05-26 (Unicode 5.0) wcwidth.js-1.0.0/index.js000066400000000000000000000046121243203353300152420ustar00rootroot00000000000000/* * wcwidth.js: a javascript porting of Markus Kuhn's wcwidth() */ "use strict" var defaults = require('defaults') var combining = require('./combining') var DEFAULTS = { nul: 0, control: 0 } function bisearch(ucs) { var min = 0 var max = combining.length - 1 var mid if (ucs < combining[0][0] || ucs > combining[max][1]) return false while (max >= min) { mid = Math.floor((min + max) / 2) if (ucs > combining[mid][1]) min = mid + 1 else if (ucs < combining[mid][0]) max = mid - 1 else return true } return false } function wcwidth(ucs, opts) { // test for 8-bit control characters if (ucs === 0) return opts.nul if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return opts.control // binary search in table of non-spacing characters if (bisearch(ucs)) return 0 // if we arrive here, ucs is not a combining or C0/C1 control character return 1 + (ucs >= 0x1100 && (ucs <= 0x115f || // Hangul Jamo init. consonants ucs == 0x2329 || ucs == 0x232a || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) || // CJK ... Yi (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compatibility Ideographs (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compatibility Forms (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms (ucs >= 0xffe0 && ucs <= 0xffe6) || (ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd))); } function wcswidth(str, opts) { var h, l var s = 0, n if (typeof str !== 'string') return wcwidth(str, opts) for (var i = 0; i < str.length; i++) { h = str.charCodeAt(i) if (h >= 0xd800 && h <= 0xdbff) { l = str.charCodeAt(++i) if (l >= 0xdc00 && l <= 0xdfff) h = (h-0xd800)*0x400 + (l-0xdc00)+0x10000 else i-- } n = wcwidth(h, opts) if (n < 0) return -1 s += n } return s } module.exports = function wcwidth(str) { return wcswidth(str, DEFAULTS) } module.exports.config = function(opts) { opts = defaults(opts || {}, DEFAULTS) return function wcwidth(str) { return wcswidth(str, opts) } } // end of wcwidth.js wcwidth.js-1.0.0/package.json000066400000000000000000000020211243203353300160530ustar00rootroot00000000000000{ "name": "wcwidth.js", "version": "1.0.0", "description": "a javascript porting of C's wcwidth()", "author": { "name": "Woong Jun", "email": "woong.jun@gmail.com", "url": "http://code.woong.org/" }, "contributors": [ { "name": "Tim Oxley", "email": "secoif@gmail.com", "url": "http://campjs.com/" } ], "homepage": "http://code.woong.org/wcwidth.js", "repository": { "type": "git", "url": "https://github.com/mycoboco/wcwidth.js.git" }, "bugs": { "url": "https://github.com/mycoboco/wcwidth.js/issues", "email": "woong.jun@gmail.com" }, "main": "index.js", "dependencies": { "defaults": "^1.0.0" }, "devDependencies": { "tape": "^2.13.4" }, "engines": { "node": ">=0.8.0" }, "licenses": "MIT", "keywords": [ "wide character", "wc", "wide character string", "wcs", "terminal", "width", "wcwidth", "wcswidth" ], "directories": { "doc": "doc", "test": "test" }, "scripts": { "test": "tape test/*.js" } } wcwidth.js-1.0.0/test/000077500000000000000000000000001243203353300145515ustar00rootroot00000000000000wcwidth.js-1.0.0/test/index.js000066400000000000000000000064511243203353300162240ustar00rootroot00000000000000/* * test cases for wcwidth */ "use strict" var wcwidth = require('../') var test = require('tape') test('handles regular strings', function (t) { t.strictEqual(wcwidth('abc'), 3) t.end() }) test('handles wide strings', function (t) { t.strictEqual(wcwidth('한글字的模块テスト'), 18) t.end() }) test('handles wide characters mixed with regular characters', function (t) { t.strictEqual(wcwidth('abc 한글字的模块テスト'), 22) t.end() }) test('handles Hangul Jamos', function (t) { t.strictEqual(wcwidth('\u1100\u1175'), 2) // 가 t.strictEqual(wcwidth('\u1112\u1161\u11ab'), 2) // 한 t.strictEqual(wcwidth('\u1100\u1160\u11ab'), 2) // JUNGSEONG FILLER t.strictEqual(wcwidth('\u115f\u1161'), 2) // CHOSEONG FILLER t.strictEqual(wcwidth('\u115f\u11ab'), 2) // CHOSEONG FILLER t.strictEqual(wcwidth('\u115f\u1160\u11ab'), 2) // CHO/JUNGSEONG FILLER t.strictEqual(wcwidth('\u115f\u1161\u11ab'), 2) // CHOSEONG FILLER t.strictEqual(wcwidth('\u1161'), 0) // incomplete t.strictEqual(wcwidth('\u11ab'), 0) // incomplete t.strictEqual(wcwidth('\u1161\u11ab'), 0) // incomplete t.strictEqual(wcwidth('\u1160\u11ab'), 0) // incomplete t.strictEqual(wcwidth('듀ᇰ'), 2) t.end() }) test('handle surrogate pairs', function (t) { t.strictEqual(wcwidth('\ud835\udca5\ud835\udcc8'), 2) t.strictEqual(wcwidth('𝒥𝒶𝓋𝒶𝓈𝒸𝓇𝒾𝓅𝓉'), 10) t.strictEqual(wcwidth('\ud840\udc34\ud840\udd58'), 4) t.end() }) test('invalid sequences with surrogate high/low values', function (t) { t.strictEqual(wcwidth('\ud835\u0065'), 2) t.strictEqual(wcwidth('\u0065\udcc8'), 2) t.strictEqual(wcwidth('a\ud835\u0065\u0065\udcc8z'), 6) t.end() }) test('ignores control characters e.g. \\n', function (t) { t.strictEqual(wcwidth('abc\t한글字的模块テスト\ndef'), 24) t.end() }) test('ignores bad input', function (t) { t.strictEqual(wcwidth(''), 0) t.strictEqual(wcwidth(3), 0) t.strictEqual(wcwidth({}), 0) t.strictEqual(wcwidth([]), 0) t.strictEqual(wcwidth(), 0) t.end() }) test('ignores NUL', function (t) { t.strictEqual(wcwidth(String.fromCharCode(0)), 0) t.strictEqual(wcwidth('\0'), 0) t.end() }) test('ignores NUL mixed with chars', function (t) { t.strictEqual(wcwidth('a' + String.fromCharCode(0) + '\n字的'), 5) t.strictEqual(wcwidth('a\0\n한글'), 5) t.end() }) test('can have custom value for NUL', function (t) { t.strictEqual(wcwidth.config({ nul: 10 })(String.fromCharCode(0) + 'a字的'), 15) t.strictEqual(wcwidth.config({ nul: 3 })('\0' + 'a한글'), 8) t.end() }) test('can have custom control char value', function (t) { t.strictEqual(wcwidth.config({ control: 1 })('abc\n한글字的模块テスト\ndef'), 26) t.end() }) test('negative custom control chars == -1', function (t) { t.strictEqual(wcwidth.config({ control: -1 })('abc\n한글字的模块テスト\ndef'), -1) t.end() }) test('negative custom value for NUL == -1', function (t) { t.strictEqual(wcwidth.config({ nul: -1 })('abc\n한글字的模块テスト\0def'), -1) t.end() }) // end of test cases