pax_global_header00006660000000000000000000000064141642561740014524gustar00rootroot0000000000000052 comment=c9543426769b60adca60ba9f41e5dd65e561fc97 utf-8-validate-5.0.8/000077500000000000000000000000001416425617400142705ustar00rootroot00000000000000utf-8-validate-5.0.8/.github/000077500000000000000000000000001416425617400156305ustar00rootroot00000000000000utf-8-validate-5.0.8/.github/workflows/000077500000000000000000000000001416425617400176655ustar00rootroot00000000000000utf-8-validate-5.0.8/.github/workflows/ci.yml000066400000000000000000000043241416425617400210060ustar00rootroot00000000000000name: CI on: - push - pull_request jobs: test: runs-on: ${{ matrix.os }} strategy: matrix: arch: - x64 - x86 node: - 12 - 14 - 16 - 17 os: - macos-latest - ubuntu-18.04 - windows-latest exclude: - arch: x86 os: macos-latest - arch: x86 os: ubuntu-18.04 steps: - uses: actions/checkout@v2 - uses: actions/setup-node@v2 with: node-version: ${{ matrix.node }} architecture: ${{ matrix.arch }} - run: npm install - run: npm test - run: npm run prebuild if: matrix.node == 14 && matrix.os != 'macos-latest' && startsWith(github.ref, 'refs/tags/') - run: npm run prebuild-darwin-x64+arm64 if: matrix.node == 14 && matrix.os == 'macos-latest' && startsWith(github.ref, 'refs/tags/') - uses: actions/upload-artifact@v2 if: matrix.node == 14 && startsWith(github.ref, 'refs/tags/') with: name: ${{ matrix.os }} path: prebuilds retention-days: 1 release: if: startsWith(github.ref, 'refs/tags/') needs: test runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/download-artifact@v2 with: path: prebuilds - run: echo ::set-output name=version::$(git describe --tags) id: get_version - run: tar -cvf "${{ steps.get_version.outputs.version }}-darwin-x64+arm64.tar" -C "prebuilds/macos-latest" . - run: tar -cvf "${{ steps.get_version.outputs.version }}-linux-x64.tar" -C "prebuilds/ubuntu-18.04" linux-x64 - run: tar -cvf "${{ steps.get_version.outputs.version }}-win32-ia32.tar" -C "prebuilds/windows-latest" win32-ia32 - run: tar -cvf "${{ steps.get_version.outputs.version }}-win32-x64.tar" -C "prebuilds/windows-latest" win32-x64 - uses: softprops/action-gh-release@v1 with: files: ${{ steps.get_version.outputs.version }}-*.tar env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} utf-8-validate-5.0.8/.gitignore000066400000000000000000000000701416425617400162550ustar00rootroot00000000000000node_modules/ prebuilds/ coverage/ build/ npm-debug.log utf-8-validate-5.0.8/.npmignore000066400000000000000000000000351416425617400162650ustar00rootroot00000000000000build/ test/ appveyor.yml .* utf-8-validate-5.0.8/.npmrc000066400000000000000000000000231416425617400154030ustar00rootroot00000000000000package-lock=false utf-8-validate-5.0.8/LICENSE000066400000000000000000000021121416425617400152710ustar00rootroot00000000000000Copyright (c) 2011 Einar Otto Stangvik (http://2x.io) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. utf-8-validate-5.0.8/README.md000066400000000000000000000022141416425617400155460ustar00rootroot00000000000000# utf-8-validate [![Version npm](https://img.shields.io/npm/v/utf-8-validate.svg?logo=npm)](https://www.npmjs.com/package/utf-8-validate) [![Linux/macOS/Windows Build](https://img.shields.io/github/workflow/status/websockets/utf-8-validate/CI/master?label=build&logo=github)](https://github.com/websockets/utf-8-validate/actions?query=workflow%3ACI+branch%3Amaster) Check if a buffer contains valid UTF-8 encoded text. ## Installation ``` npm install utf-8-validate --save-optional ``` The `--save-optional` flag tells npm to save the package in your package.json under the [`optionalDependencies`](https://docs.npmjs.com/files/package.json#optionaldependencies) key. ## API The module exports a single function which takes one argument. ### `isValidUTF8(buffer)` Checks whether a buffer contains valid UTF-8. #### Arguments - `buffer` - The buffer to check. #### Return value `true` if the buffer contains only correct UTF-8, else `false`. #### Example ```js 'use strict'; const isValidUTF8 = require('utf-8-validate'); const buf = Buffer.from([0xf0, 0x90, 0x80, 0x80]); console.log(isValidUTF8(buf)); // => true ``` ## License [MIT](LICENSE) utf-8-validate-5.0.8/appveyor.yml000066400000000000000000000005171416425617400166630ustar00rootroot00000000000000build: off configuration: Release environment: matrix: - nodejs_version: "16" - nodejs_version: "14" - nodejs_version: "12" platform: - x86 matrix: fast_finish: true install: - ps: Install-Product node $env:nodejs_version $env:platform - npm install test_script: - node --version - npm --version - npm test utf-8-validate-5.0.8/binding.gyp000066400000000000000000000006401416425617400164230ustar00rootroot00000000000000{ 'targets': [ { 'target_name': 'validation', 'sources': ['src/validation.c'], 'cflags': ['-std=c99'], 'conditions': [ ["OS=='mac'", { 'xcode_settings': { 'MACOSX_DEPLOYMENT_TARGET': '10.7', 'OTHER_CFLAGS': ['-arch x86_64', '-arch arm64'], 'OTHER_LDFLAGS': ['-arch x86_64', '-arch arm64'] } }] ] } ] } utf-8-validate-5.0.8/fallback.js000066400000000000000000000031351416425617400163670ustar00rootroot00000000000000'use strict'; /** * Checks if a given buffer contains only correct UTF-8. * Ported from https://www.cl.cam.ac.uk/%7Emgk25/ucs/utf8_check.c by * Markus Kuhn. * * @param {Buffer} buf The buffer to check * @return {Boolean} `true` if `buf` contains only correct UTF-8, else `false` * @public */ function isValidUTF8(buf) { const len = buf.length; let i = 0; while (i < len) { if ((buf[i] & 0x80) === 0x00) { // 0xxxxxxx i++; } else if ((buf[i] & 0xe0) === 0xc0) { // 110xxxxx 10xxxxxx if ( i + 1 === len || (buf[i + 1] & 0xc0) !== 0x80 || (buf[i] & 0xfe) === 0xc0 // overlong ) { return false; } i += 2; } else if ((buf[i] & 0xf0) === 0xe0) { // 1110xxxx 10xxxxxx 10xxxxxx if ( i + 2 >= len || (buf[i + 1] & 0xc0) !== 0x80 || (buf[i + 2] & 0xc0) !== 0x80 || buf[i] === 0xe0 && (buf[i + 1] & 0xe0) === 0x80 || // overlong buf[i] === 0xed && (buf[i + 1] & 0xe0) === 0xa0 // surrogate (U+D800 - U+DFFF) ) { return false; } i += 3; } else if ((buf[i] & 0xf8) === 0xf0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx if ( i + 3 >= len || (buf[i + 1] & 0xc0) !== 0x80 || (buf[i + 2] & 0xc0) !== 0x80 || (buf[i + 3] & 0xc0) !== 0x80 || buf[i] === 0xf0 && (buf[i + 1] & 0xf0) === 0x80 || // overlong buf[i] === 0xf4 && buf[i + 1] > 0x8f || buf[i] > 0xf4 // > U+10FFFF ) { return false; } i += 4; } else { return false; } } return true; } module.exports = isValidUTF8; utf-8-validate-5.0.8/index.js000066400000000000000000000002101416425617400157260ustar00rootroot00000000000000'use strict'; try { module.exports = require('node-gyp-build')(__dirname); } catch (e) { module.exports = require('./fallback'); } utf-8-validate-5.0.8/package.json000066400000000000000000000016211416425617400165560ustar00rootroot00000000000000{ "name": "utf-8-validate", "version": "5.0.8", "description": "Check if a buffer contains valid UTF-8", "main": "index.js", "engines": { "node": ">=6.14.2" }, "scripts": { "install": "node-gyp-build", "prebuild": "prebuildify --napi --strip", "prebuild-darwin-x64+arm64": "prebuildify --arch x64+arm64 --napi --strip", "test": "mocha" }, "repository": { "type": "git", "url": "https://github.com/websockets/utf-8-validate" }, "keywords": [ "utf-8-validate" ], "author": "Einar Otto Stangvik (http://2x.io)", "license": "MIT", "bugs": { "url": "https://github.com/websockets/utf-8-validate/issues" }, "homepage": "https://github.com/websockets/utf-8-validate", "dependencies": { "node-gyp-build": "^4.3.0" }, "devDependencies": { "mocha": "^9.0.3", "node-gyp": "^7.1.2", "prebuildify": "^5.0.0" } } utf-8-validate-5.0.8/src/000077500000000000000000000000001416425617400150575ustar00rootroot00000000000000utf-8-validate-5.0.8/src/validation.c000066400000000000000000000050351416425617400173600ustar00rootroot00000000000000#define NAPI_VERSION 1 #include #include #include napi_value IsValidUTF8(napi_env env, napi_callback_info info) { napi_status status; size_t argc = 1; napi_value argv[1]; status = napi_get_cb_info(env, info, &argc, argv, NULL, NULL); assert(status == napi_ok); uint8_t *buf; size_t len; status = napi_get_buffer_info(env, argv[0], (void **)&buf, &len); assert(status == napi_ok); size_t i = 0; // // This code has been taken from utf8_check.c which was developed by // Markus Kuhn . // // For original code / licensing please refer to // https://www.cl.cam.ac.uk/%7Emgk25/ucs/utf8_check.c // while (i < len) { size_t j = i + 8; if (j <= len) { // // Read 8 bytes and check if they are ASCII. // uint64_t chunk; memcpy(&chunk, buf + i, 8); if ((chunk & 0x8080808080808080) == 0x00) { i = j; continue; } } while ((buf[i] & 0x80) == 0x00) { // 0xxxxxxx if (++i == len) { goto exit; } } if ((buf[i] & 0xe0) == 0xc0) { // 110xxxxx 10xxxxxx if ( i + 1 == len || (buf[i + 1] & 0xc0) != 0x80 || (buf[i] & 0xfe) == 0xc0 // overlong ) { break; } i += 2; } else if ((buf[i] & 0xf0) == 0xe0) { // 1110xxxx 10xxxxxx 10xxxxxx if ( i + 2 >= len || (buf[i + 1] & 0xc0) != 0x80 || (buf[i + 2] & 0xc0) != 0x80 || (buf[i] == 0xe0 && (buf[i + 1] & 0xe0) == 0x80) || // overlong (buf[i] == 0xed && (buf[i + 1] & 0xe0) == 0xa0) // surrogate (U+D800 - U+DFFF) ) { break; } i += 3; } else if ((buf[i] & 0xf8) == 0xf0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx if ( i + 3 >= len || (buf[i + 1] & 0xc0) != 0x80 || (buf[i + 2] & 0xc0) != 0x80 || (buf[i + 3] & 0xc0) != 0x80 || (buf[i] == 0xf0 && (buf[i + 1] & 0xf0) == 0x80) || // overlong (buf[i] == 0xf4 && buf[i + 1] > 0x8f) || buf[i] > 0xf4 // > U+10FFFF ) { break; } i += 4; } else { break; } } exit:; napi_value result; status = napi_get_boolean(env, i == len, &result); assert(status == napi_ok); return result; } napi_value Init(napi_env env, napi_value exports) { napi_status status; napi_value isValidUTF8; status = napi_create_function(env, NULL, 0, IsValidUTF8, NULL, &isValidUTF8); assert(status == napi_ok); return isValidUTF8; } NAPI_MODULE(NODE_GYP_MODULE_NAME, Init) utf-8-validate-5.0.8/test/000077500000000000000000000000001416425617400152475ustar00rootroot00000000000000utf-8-validate-5.0.8/test/fixtures/000077500000000000000000000000001416425617400171205ustar00rootroot00000000000000utf-8-validate-5.0.8/test/fixtures/lorem-ipsum.txt000066400000000000000000000014731416425617400221370ustar00rootroot00000000000000Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque gravida mattis rhoncus. Donec iaculis, metus quis varius accumsan, erat mauris condimentum diam, et egestas erat enim ut ligula. Praesent sollicitudin tellus eget dolor euismod euismod. Nullam ac augue nec neque varius luctus. Curabitur elit mi, consequat ultricies adipiscing mollis, scelerisque in erat. Phasellus facilisis fermentum ullamcorper. Nulla et sem eu arcu pharetra pellentesque. Praesent consectetur tempor justo, vel iaculis dui ullamcorper sit amet. Integer tristique viverra ullamcorper. Vivamus laoreet, nulla eget suscipit eleifend, lacus lectus feugiat libero, non fermentum erat nisi at risus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut pulvinar dignissim tellus, eu dignissim lorem vulputate quis. Morbi ut pulvinar augue. utf-8-validate-5.0.8/test/test.js000066400000000000000000000026001416425617400165620ustar00rootroot00000000000000'use strict'; const { join } = require('path'); const { readFileSync } = require('fs'); const { strictEqual } = require('assert'); const txt = readFileSync(join(__dirname, 'fixtures', 'lorem-ipsum.txt')); function use(isValidUTF8) { return function () { it('returns true with an empty buffer', function () { strictEqual(isValidUTF8(Buffer.alloc(0)), true); }); it('returns true for a valid utf8 string', function () { strictEqual(isValidUTF8(Buffer.from(txt)), true); }); it('returns false for an erroneous string', function () { var invalid = Buffer.from([ 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0xed, 0xa0, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 ]); strictEqual(isValidUTF8(invalid), false); }); it('returns true for valid cases from the autobahn test suite', function () { strictEqual( isValidUTF8(Buffer.from('\xf0\x90\x80\x80')), true ); strictEqual( isValidUTF8(Buffer.from([0xf0, 0x90, 0x80, 0x80])), true ); }); it('returns false for erroneous autobahn strings', function () { strictEqual( isValidUTF8(Buffer.from([0xce, 0xba, 0xe1, 0xbd])), false ); }); }; } describe('bindings', use(require('node-gyp-build')(join(__dirname, '..')))); describe('fallback', use(require('../fallback')));