pax_global_header 0000666 0000000 0000000 00000000064 14120103741 0014503 g ustar 00root root 0000000 0000000 52 comment=ebf17f04db64a20ce74902c18144e915b82183b9
regexpu-core-4.8.0/ 0000775 0000000 0000000 00000000000 14120103741 0014121 5 ustar 00root root 0000000 0000000 regexpu-core-4.8.0/.editorconfig 0000664 0000000 0000000 00000000301 14120103741 0016570 0 ustar 00root root 0000000 0000000 root = true
[*]
charset = utf-8
indent_style = tab
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[{README.md,.travis.yml}]
indent_style = space
indent_size = 2
regexpu-core-4.8.0/.gitattributes 0000664 0000000 0000000 00000000114 14120103741 0017010 0 ustar 00root root 0000000 0000000 # Automatically normalize line endings for all text-based files
* text=auto
regexpu-core-4.8.0/.github/ 0000775 0000000 0000000 00000000000 14120103741 0015461 5 ustar 00root root 0000000 0000000 regexpu-core-4.8.0/.github/workflows/ 0000775 0000000 0000000 00000000000 14120103741 0017516 5 ustar 00root root 0000000 0000000 regexpu-core-4.8.0/.github/workflows/main.yml 0000664 0000000 0000000 00000002017 14120103741 0021165 0 ustar 00root root 0000000 0000000 name: run-checks
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
publish:
runs-on: ubuntu-latest
strategy:
matrix:
# Include all major maintenance + active LTS + current Node.js versions.
# https://github.com/nodejs/Release#release-schedule
node: [12, 14, 16]
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Node.js 16
uses: actions/setup-node@v1
with:
# Always build using the same Node.js version, to ensure consistent
# results from scripts/iu-mappings.js.
# This version should be the same as the one used in the
# publish-on-tag workflow.
node-version: 16
- name: Install dependencies
run: npm install
- name: Build
run: npm run build
- name: Set up Node.js ${{ matrix.node }}
uses: actions/setup-node@v1
with:
node-version: ${{ matrix.node }}
- name: Test
run: npm test
regexpu-core-4.8.0/.github/workflows/publish-on-tag.yml 0000664 0000000 0000000 00000001675 14120103741 0023103 0 ustar 00root root 0000000 0000000 name: publish-on-tag
on:
push:
tags:
- '*'
jobs:
publish:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Node.js 16
uses: actions/setup-node@v1
with:
# Always build using the same Node.js version, to ensure consistent
# results from scripts/iu-mappings.js.
# This version should be the same as the one used in the main workflow.
node-version: 16
- name: Install dependencies
run: npm install
- name: Build
run: npm run build
- name: Test
run: npm test
- name: Publish
env:
NPM_TOKEN: ${{secrets.NPM_TOKEN}}
run: |
npm config set access public
npm config set registry 'https://wombat-dressing-room.appspot.com/'
npm config set '//wombat-dressing-room.appspot.com/:_authToken' '${NPM_TOKEN}'
npm publish
regexpu-core-4.8.0/.gitignore 0000664 0000000 0000000 00000000347 14120103741 0016115 0 ustar 00root root 0000000 0000000 # Coverage report
coverage
# Installed npm modules
node_modules
# Folder view configuration files
.DS_Store
Desktop.ini
# Thumbnail cache files
._*
Thumbs.db
# Files that might appear on external disks
.Spotlight-V100
.Trashes
regexpu-core-4.8.0/.npmrc 0000664 0000000 0000000 00000000023 14120103741 0015234 0 ustar 00root root 0000000 0000000 package-lock=false
regexpu-core-4.8.0/LICENSE-MIT.txt 0000664 0000000 0000000 00000002065 14120103741 0016376 0 ustar 00root root 0000000 0000000 Copyright Mathias Bynens
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
regexpu-core-4.8.0/README.md 0000664 0000000 0000000 00000012512 14120103741 0015401 0 ustar 00root root 0000000 0000000 # regexpu-core [](https://github.com/mathiasbynens/regexpu-core/actions?query=workflow%3Arun-checks) [](https://www.npmjs.com/package/regexpu-core)
_regexpu_ is a source code transpiler that enables the use of ES2015 Unicode regular expressions in JavaScript-of-today (ES5).
_regexpu-core_ contains _regexpu_’s core functionality, i.e. `rewritePattern(pattern, flag)`, which enables rewriting regular expressions that make use of [the ES2015 `u` flag](https://mathiasbynens.be/notes/es6-unicode-regex) into equivalent ES5-compatible regular expression patterns.
## Installation
To use _regexpu-core_ programmatically, install it as a dependency via [npm](https://www.npmjs.com/):
```bash
npm install regexpu-core --save
```
Then, `require` it:
```js
const rewritePattern = require('regexpu-core');
```
## API
This module exports a single function named `rewritePattern`.
### `rewritePattern(pattern, flags, options)`
This function takes a string that represents a regular expression pattern as well as a string representing its flags, and returns an ES5-compatible version of the pattern.
```js
rewritePattern('foo.bar', 'u');
// → 'foo(?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uDC00-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF])bar'
rewritePattern('[\\u{1D306}-\\u{1D308}a-z]', 'u');
// → '(?:[a-z]|\\uD834[\\uDF06-\\uDF08])'
rewritePattern('[\\u{1D306}-\\u{1D308}a-z]', 'ui');
// → '(?:[a-z\\u017F\\u212A]|\\uD834[\\uDF06-\\uDF08])'
```
_regexpu-core_ can rewrite non-ES6 regular expressions too, which is useful to demonstrate how their behavior changes once the `u` and `i` flags are added:
```js
// In ES5, the dot operator only matches BMP symbols:
rewritePattern('foo.bar');
// → 'foo(?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF])bar'
// But with the ES2015 `u` flag, it matches astral symbols too:
rewritePattern('foo.bar', 'u');
// → 'foo(?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uDC00-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF])bar'
```
The optional `options` argument recognizes the following properties:
#### `dotAllFlag` (default: `false`)
Setting this option to `true` enables support for [the `s` (`dotAll`) flag](https://github.com/mathiasbynens/es-regexp-dotall-flag).
```js
rewritePattern('.');
// → '[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF]'
rewritePattern('.', '', {
'dotAllFlag': true
});
// → '[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF]'
rewritePattern('.', 's', {
'dotAllFlag': true
});
// → '[\\0-\\uFFFF]'
rewritePattern('.', 'su', {
'dotAllFlag': true
});
// → '(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])'
```
#### `unicodePropertyEscape` (default: `false`)
Setting this option to `true` enables [support for Unicode property escapes](property-escapes.md):
```js
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', {
'unicodePropertyEscape': true
});
// → '(?:\\uD811[\\uDC00-\\uDE46])'
```
#### `lookbehind` (default: `false`)
Setting this option to `true` enables support for [lookbehind assertions](https://github.com/tc39/proposal-regexp-lookbehind).
```js
rewritePattern('(?<=.)a', '', {
'lookbehind': true
});
// → '(?<=[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uFFFF])a'
```
#### `namedGroup` (default: `false`)
Setting this option to `true` enables support for [named capture groups](https://github.com/tc39/proposal-regexp-named-groups).
```js
rewritePattern('(?.)\k', '', {
'namedGroup': true
});
// → '(.)\1'
```
#### `onNamedGroup`
This option is a function that gets called when a named capture group is found. It receives two parameters:
the name of the group, and its index.
```js
rewritePattern('(?.)\k', '', {
'namedGroup': true,
onNamedGroup(name, index) {
console.log(name, index);
// → 'name', 1
}
});
```
#### `useUnicodeFlag` (default: `false`)
Setting this option to `true` enables the use of Unicode code point escapes of the form `\u{…}`. Note that in regular expressions, such escape sequences only work correctly when the ES2015 `u` flag is set. Enabling this setting often results in more compact output, although there are cases (such as `\p{Lu}`) where it actually _increases_ the output size.
```js
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', {
'unicodePropertyEscape': true,
'useUnicodeFlag': true
});
// → '[\\u{14400}-\\u{14646}]'
```
## For maintainers
### How to publish a new release
1. On the `main` branch, bump the version number in `package.json`:
```sh
npm version patch -m 'Release v%s'
```
Instead of `patch`, use `minor` or `major` [as needed](https://semver.org/).
Note that this produces a Git commit + tag.
1. Push the release commit and tag:
```sh
git push && git push --tags
```
Our CI then automatically publishes the new release to npm.
## Author
| [](https://twitter.com/mathias "Follow @mathias on Twitter") |
|---|
| [Mathias Bynens](https://mathiasbynens.be/) |
## License
_regexpu-core_ is available under the [MIT](https://mths.be/mit) license.
regexpu-core-4.8.0/data/ 0000775 0000000 0000000 00000000000 14120103741 0015032 5 ustar 00root root 0000000 0000000 regexpu-core-4.8.0/data/character-class-escape-sets.js 0000664 0000000 0000000 00000005212 14120103741 0022641 0 ustar 00root root 0000000 0000000 // Generated using `npm run build`. Do not edit.
'use strict';
const regenerate = require('regenerate');
exports.REGULAR = new Map([
['d', regenerate()
.addRange(0x30, 0x39)],
['D', regenerate()
.addRange(0x0, 0x2F)
.addRange(0x3A, 0xFFFF)],
['s', regenerate(0x20, 0xA0, 0x1680, 0x202F, 0x205F, 0x3000, 0xFEFF)
.addRange(0x9, 0xD)
.addRange(0x2000, 0x200A)
.addRange(0x2028, 0x2029)],
['S', regenerate()
.addRange(0x0, 0x8)
.addRange(0xE, 0x1F)
.addRange(0x21, 0x9F)
.addRange(0xA1, 0x167F)
.addRange(0x1681, 0x1FFF)
.addRange(0x200B, 0x2027)
.addRange(0x202A, 0x202E)
.addRange(0x2030, 0x205E)
.addRange(0x2060, 0x2FFF)
.addRange(0x3001, 0xFEFE)
.addRange(0xFF00, 0xFFFF)],
['w', regenerate(0x5F)
.addRange(0x30, 0x39)
.addRange(0x41, 0x5A)
.addRange(0x61, 0x7A)],
['W', regenerate(0x60)
.addRange(0x0, 0x2F)
.addRange(0x3A, 0x40)
.addRange(0x5B, 0x5E)
.addRange(0x7B, 0xFFFF)]
]);
exports.UNICODE = new Map([
['d', regenerate()
.addRange(0x30, 0x39)],
['D', regenerate()
.addRange(0x0, 0x2F)
.addRange(0x3A, 0x10FFFF)],
['s', regenerate(0x20, 0xA0, 0x1680, 0x202F, 0x205F, 0x3000, 0xFEFF)
.addRange(0x9, 0xD)
.addRange(0x2000, 0x200A)
.addRange(0x2028, 0x2029)],
['S', regenerate()
.addRange(0x0, 0x8)
.addRange(0xE, 0x1F)
.addRange(0x21, 0x9F)
.addRange(0xA1, 0x167F)
.addRange(0x1681, 0x1FFF)
.addRange(0x200B, 0x2027)
.addRange(0x202A, 0x202E)
.addRange(0x2030, 0x205E)
.addRange(0x2060, 0x2FFF)
.addRange(0x3001, 0xFEFE)
.addRange(0xFF00, 0x10FFFF)],
['w', regenerate(0x5F)
.addRange(0x30, 0x39)
.addRange(0x41, 0x5A)
.addRange(0x61, 0x7A)],
['W', regenerate(0x60)
.addRange(0x0, 0x2F)
.addRange(0x3A, 0x40)
.addRange(0x5B, 0x5E)
.addRange(0x7B, 0x10FFFF)]
]);
exports.UNICODE_IGNORE_CASE = new Map([
['d', regenerate()
.addRange(0x30, 0x39)],
['D', regenerate()
.addRange(0x0, 0x2F)
.addRange(0x3A, 0x10FFFF)],
['s', regenerate(0x20, 0xA0, 0x1680, 0x202F, 0x205F, 0x3000, 0xFEFF)
.addRange(0x9, 0xD)
.addRange(0x2000, 0x200A)
.addRange(0x2028, 0x2029)],
['S', regenerate()
.addRange(0x0, 0x8)
.addRange(0xE, 0x1F)
.addRange(0x21, 0x9F)
.addRange(0xA1, 0x167F)
.addRange(0x1681, 0x1FFF)
.addRange(0x200B, 0x2027)
.addRange(0x202A, 0x202E)
.addRange(0x2030, 0x205E)
.addRange(0x2060, 0x2FFF)
.addRange(0x3001, 0xFEFE)
.addRange(0xFF00, 0x10FFFF)],
['w', regenerate(0x5F, 0x17F, 0x212A)
.addRange(0x30, 0x39)
.addRange(0x41, 0x5A)
.addRange(0x61, 0x7A)],
['W', regenerate(0x60)
.addRange(0x0, 0x2F)
.addRange(0x3A, 0x40)
.addRange(0x5B, 0x5E)
.addRange(0x7B, 0x17E)
.addRange(0x180, 0x2129)
.addRange(0x212B, 0x10FFFF)]
]);
regexpu-core-4.8.0/data/iu-mappings.js 0000664 0000000 0000000 00000032070 14120103741 0017623 0 ustar 00root root 0000000 0000000 module.exports = new Map([
[0x4B, 0x212A],
[0x53, 0x17F],
[0x6B, 0x212A],
[0x73, 0x17F],
[0xB5, 0x39C],
[0xC5, 0x212B],
[0xDF, 0x1E9E],
[0xE5, 0x212B],
[0x17F, 0x53],
[0x1C4, 0x1C5],
[0x1C5, 0x1C4],
[0x1C7, 0x1C8],
[0x1C8, 0x1C7],
[0x1CA, 0x1CB],
[0x1CB, 0x1CA],
[0x1F1, 0x1F2],
[0x1F2, 0x1F1],
[0x345, 0x1FBE],
[0x392, 0x3D0],
[0x395, 0x3F5],
[0x398, 0x3F4],
[0x399, 0x1FBE],
[0x39A, 0x3F0],
[0x39C, 0xB5],
[0x3A0, 0x3D6],
[0x3A1, 0x3F1],
[0x3A3, 0x3C2],
[0x3A6, 0x3D5],
[0x3A9, 0x2126],
[0x3B8, 0x3F4],
[0x3C2, 0x3A3],
[0x3C9, 0x2126],
[0x3D0, 0x392],
[0x3D1, 0x3F4],
[0x3D5, 0x3A6],
[0x3D6, 0x3A0],
[0x3F0, 0x39A],
[0x3F1, 0x3A1],
[0x3F4, [
0x398,
0x3D1,
0x3B8
]],
[0x3F5, 0x395],
[0x412, 0x1C80],
[0x414, 0x1C81],
[0x41E, 0x1C82],
[0x421, 0x1C83],
[0x422, 0x1C85],
[0x42A, 0x1C86],
[0x462, 0x1C87],
[0x1C80, 0x412],
[0x1C81, 0x414],
[0x1C82, 0x41E],
[0x1C83, 0x421],
[0x1C84, 0x1C85],
[0x1C85, [
0x422,
0x1C84
]],
[0x1C86, 0x42A],
[0x1C87, 0x462],
[0x1C88, 0xA64A],
[0x1E60, 0x1E9B],
[0x1E9B, 0x1E60],
[0x1E9E, 0xDF],
[0x1F80, 0x1F88],
[0x1F81, 0x1F89],
[0x1F82, 0x1F8A],
[0x1F83, 0x1F8B],
[0x1F84, 0x1F8C],
[0x1F85, 0x1F8D],
[0x1F86, 0x1F8E],
[0x1F87, 0x1F8F],
[0x1F88, 0x1F80],
[0x1F89, 0x1F81],
[0x1F8A, 0x1F82],
[0x1F8B, 0x1F83],
[0x1F8C, 0x1F84],
[0x1F8D, 0x1F85],
[0x1F8E, 0x1F86],
[0x1F8F, 0x1F87],
[0x1F90, 0x1F98],
[0x1F91, 0x1F99],
[0x1F92, 0x1F9A],
[0x1F93, 0x1F9B],
[0x1F94, 0x1F9C],
[0x1F95, 0x1F9D],
[0x1F96, 0x1F9E],
[0x1F97, 0x1F9F],
[0x1F98, 0x1F90],
[0x1F99, 0x1F91],
[0x1F9A, 0x1F92],
[0x1F9B, 0x1F93],
[0x1F9C, 0x1F94],
[0x1F9D, 0x1F95],
[0x1F9E, 0x1F96],
[0x1F9F, 0x1F97],
[0x1FA0, 0x1FA8],
[0x1FA1, 0x1FA9],
[0x1FA2, 0x1FAA],
[0x1FA3, 0x1FAB],
[0x1FA4, 0x1FAC],
[0x1FA5, 0x1FAD],
[0x1FA6, 0x1FAE],
[0x1FA7, 0x1FAF],
[0x1FA8, 0x1FA0],
[0x1FA9, 0x1FA1],
[0x1FAA, 0x1FA2],
[0x1FAB, 0x1FA3],
[0x1FAC, 0x1FA4],
[0x1FAD, 0x1FA5],
[0x1FAE, 0x1FA6],
[0x1FAF, 0x1FA7],
[0x1FB3, 0x1FBC],
[0x1FBC, 0x1FB3],
[0x1FBE, [
0x345,
0x399
]],
[0x1FC3, 0x1FCC],
[0x1FCC, 0x1FC3],
[0x1FF3, 0x1FFC],
[0x1FFC, 0x1FF3],
[0x2126, [
0x3A9,
0x3C9
]],
[0x212A, 0x4B],
[0x212B, [
0xC5,
0xE5
]],
[0x2C2F, 0x2C5F],
[0x2C5F, 0x2C2F],
[0xA64A, 0x1C88],
[0xA7C0, 0xA7C1],
[0xA7C1, 0xA7C0],
[0xA7D0, 0xA7D1],
[0xA7D1, 0xA7D0],
[0xA7D6, 0xA7D7],
[0xA7D7, 0xA7D6],
[0xA7D8, 0xA7D9],
[0xA7D9, 0xA7D8],
[0x10400, 0x10428],
[0x10401, 0x10429],
[0x10402, 0x1042A],
[0x10403, 0x1042B],
[0x10404, 0x1042C],
[0x10405, 0x1042D],
[0x10406, 0x1042E],
[0x10407, 0x1042F],
[0x10408, 0x10430],
[0x10409, 0x10431],
[0x1040A, 0x10432],
[0x1040B, 0x10433],
[0x1040C, 0x10434],
[0x1040D, 0x10435],
[0x1040E, 0x10436],
[0x1040F, 0x10437],
[0x10410, 0x10438],
[0x10411, 0x10439],
[0x10412, 0x1043A],
[0x10413, 0x1043B],
[0x10414, 0x1043C],
[0x10415, 0x1043D],
[0x10416, 0x1043E],
[0x10417, 0x1043F],
[0x10418, 0x10440],
[0x10419, 0x10441],
[0x1041A, 0x10442],
[0x1041B, 0x10443],
[0x1041C, 0x10444],
[0x1041D, 0x10445],
[0x1041E, 0x10446],
[0x1041F, 0x10447],
[0x10420, 0x10448],
[0x10421, 0x10449],
[0x10422, 0x1044A],
[0x10423, 0x1044B],
[0x10424, 0x1044C],
[0x10425, 0x1044D],
[0x10426, 0x1044E],
[0x10427, 0x1044F],
[0x10428, 0x10400],
[0x10429, 0x10401],
[0x1042A, 0x10402],
[0x1042B, 0x10403],
[0x1042C, 0x10404],
[0x1042D, 0x10405],
[0x1042E, 0x10406],
[0x1042F, 0x10407],
[0x10430, 0x10408],
[0x10431, 0x10409],
[0x10432, 0x1040A],
[0x10433, 0x1040B],
[0x10434, 0x1040C],
[0x10435, 0x1040D],
[0x10436, 0x1040E],
[0x10437, 0x1040F],
[0x10438, 0x10410],
[0x10439, 0x10411],
[0x1043A, 0x10412],
[0x1043B, 0x10413],
[0x1043C, 0x10414],
[0x1043D, 0x10415],
[0x1043E, 0x10416],
[0x1043F, 0x10417],
[0x10440, 0x10418],
[0x10441, 0x10419],
[0x10442, 0x1041A],
[0x10443, 0x1041B],
[0x10444, 0x1041C],
[0x10445, 0x1041D],
[0x10446, 0x1041E],
[0x10447, 0x1041F],
[0x10448, 0x10420],
[0x10449, 0x10421],
[0x1044A, 0x10422],
[0x1044B, 0x10423],
[0x1044C, 0x10424],
[0x1044D, 0x10425],
[0x1044E, 0x10426],
[0x1044F, 0x10427],
[0x104B0, 0x104D8],
[0x104B1, 0x104D9],
[0x104B2, 0x104DA],
[0x104B3, 0x104DB],
[0x104B4, 0x104DC],
[0x104B5, 0x104DD],
[0x104B6, 0x104DE],
[0x104B7, 0x104DF],
[0x104B8, 0x104E0],
[0x104B9, 0x104E1],
[0x104BA, 0x104E2],
[0x104BB, 0x104E3],
[0x104BC, 0x104E4],
[0x104BD, 0x104E5],
[0x104BE, 0x104E6],
[0x104BF, 0x104E7],
[0x104C0, 0x104E8],
[0x104C1, 0x104E9],
[0x104C2, 0x104EA],
[0x104C3, 0x104EB],
[0x104C4, 0x104EC],
[0x104C5, 0x104ED],
[0x104C6, 0x104EE],
[0x104C7, 0x104EF],
[0x104C8, 0x104F0],
[0x104C9, 0x104F1],
[0x104CA, 0x104F2],
[0x104CB, 0x104F3],
[0x104CC, 0x104F4],
[0x104CD, 0x104F5],
[0x104CE, 0x104F6],
[0x104CF, 0x104F7],
[0x104D0, 0x104F8],
[0x104D1, 0x104F9],
[0x104D2, 0x104FA],
[0x104D3, 0x104FB],
[0x104D8, 0x104B0],
[0x104D9, 0x104B1],
[0x104DA, 0x104B2],
[0x104DB, 0x104B3],
[0x104DC, 0x104B4],
[0x104DD, 0x104B5],
[0x104DE, 0x104B6],
[0x104DF, 0x104B7],
[0x104E0, 0x104B8],
[0x104E1, 0x104B9],
[0x104E2, 0x104BA],
[0x104E3, 0x104BB],
[0x104E4, 0x104BC],
[0x104E5, 0x104BD],
[0x104E6, 0x104BE],
[0x104E7, 0x104BF],
[0x104E8, 0x104C0],
[0x104E9, 0x104C1],
[0x104EA, 0x104C2],
[0x104EB, 0x104C3],
[0x104EC, 0x104C4],
[0x104ED, 0x104C5],
[0x104EE, 0x104C6],
[0x104EF, 0x104C7],
[0x104F0, 0x104C8],
[0x104F1, 0x104C9],
[0x104F2, 0x104CA],
[0x104F3, 0x104CB],
[0x104F4, 0x104CC],
[0x104F5, 0x104CD],
[0x104F6, 0x104CE],
[0x104F7, 0x104CF],
[0x104F8, 0x104D0],
[0x104F9, 0x104D1],
[0x104FA, 0x104D2],
[0x104FB, 0x104D3],
[0x10570, 0x10597],
[0x10571, 0x10598],
[0x10572, 0x10599],
[0x10573, 0x1059A],
[0x10574, 0x1059B],
[0x10575, 0x1059C],
[0x10576, 0x1059D],
[0x10577, 0x1059E],
[0x10578, 0x1059F],
[0x10579, 0x105A0],
[0x1057A, 0x105A1],
[0x1057C, 0x105A3],
[0x1057D, 0x105A4],
[0x1057E, 0x105A5],
[0x1057F, 0x105A6],
[0x10580, 0x105A7],
[0x10581, 0x105A8],
[0x10582, 0x105A9],
[0x10583, 0x105AA],
[0x10584, 0x105AB],
[0x10585, 0x105AC],
[0x10586, 0x105AD],
[0x10587, 0x105AE],
[0x10588, 0x105AF],
[0x10589, 0x105B0],
[0x1058A, 0x105B1],
[0x1058C, 0x105B3],
[0x1058D, 0x105B4],
[0x1058E, 0x105B5],
[0x1058F, 0x105B6],
[0x10590, 0x105B7],
[0x10591, 0x105B8],
[0x10592, 0x105B9],
[0x10594, 0x105BB],
[0x10595, 0x105BC],
[0x10597, 0x10570],
[0x10598, 0x10571],
[0x10599, 0x10572],
[0x1059A, 0x10573],
[0x1059B, 0x10574],
[0x1059C, 0x10575],
[0x1059D, 0x10576],
[0x1059E, 0x10577],
[0x1059F, 0x10578],
[0x105A0, 0x10579],
[0x105A1, 0x1057A],
[0x105A3, 0x1057C],
[0x105A4, 0x1057D],
[0x105A5, 0x1057E],
[0x105A6, 0x1057F],
[0x105A7, 0x10580],
[0x105A8, 0x10581],
[0x105A9, 0x10582],
[0x105AA, 0x10583],
[0x105AB, 0x10584],
[0x105AC, 0x10585],
[0x105AD, 0x10586],
[0x105AE, 0x10587],
[0x105AF, 0x10588],
[0x105B0, 0x10589],
[0x105B1, 0x1058A],
[0x105B3, 0x1058C],
[0x105B4, 0x1058D],
[0x105B5, 0x1058E],
[0x105B6, 0x1058F],
[0x105B7, 0x10590],
[0x105B8, 0x10591],
[0x105B9, 0x10592],
[0x105BB, 0x10594],
[0x105BC, 0x10595],
[0x10C80, 0x10CC0],
[0x10C81, 0x10CC1],
[0x10C82, 0x10CC2],
[0x10C83, 0x10CC3],
[0x10C84, 0x10CC4],
[0x10C85, 0x10CC5],
[0x10C86, 0x10CC6],
[0x10C87, 0x10CC7],
[0x10C88, 0x10CC8],
[0x10C89, 0x10CC9],
[0x10C8A, 0x10CCA],
[0x10C8B, 0x10CCB],
[0x10C8C, 0x10CCC],
[0x10C8D, 0x10CCD],
[0x10C8E, 0x10CCE],
[0x10C8F, 0x10CCF],
[0x10C90, 0x10CD0],
[0x10C91, 0x10CD1],
[0x10C92, 0x10CD2],
[0x10C93, 0x10CD3],
[0x10C94, 0x10CD4],
[0x10C95, 0x10CD5],
[0x10C96, 0x10CD6],
[0x10C97, 0x10CD7],
[0x10C98, 0x10CD8],
[0x10C99, 0x10CD9],
[0x10C9A, 0x10CDA],
[0x10C9B, 0x10CDB],
[0x10C9C, 0x10CDC],
[0x10C9D, 0x10CDD],
[0x10C9E, 0x10CDE],
[0x10C9F, 0x10CDF],
[0x10CA0, 0x10CE0],
[0x10CA1, 0x10CE1],
[0x10CA2, 0x10CE2],
[0x10CA3, 0x10CE3],
[0x10CA4, 0x10CE4],
[0x10CA5, 0x10CE5],
[0x10CA6, 0x10CE6],
[0x10CA7, 0x10CE7],
[0x10CA8, 0x10CE8],
[0x10CA9, 0x10CE9],
[0x10CAA, 0x10CEA],
[0x10CAB, 0x10CEB],
[0x10CAC, 0x10CEC],
[0x10CAD, 0x10CED],
[0x10CAE, 0x10CEE],
[0x10CAF, 0x10CEF],
[0x10CB0, 0x10CF0],
[0x10CB1, 0x10CF1],
[0x10CB2, 0x10CF2],
[0x10CC0, 0x10C80],
[0x10CC1, 0x10C81],
[0x10CC2, 0x10C82],
[0x10CC3, 0x10C83],
[0x10CC4, 0x10C84],
[0x10CC5, 0x10C85],
[0x10CC6, 0x10C86],
[0x10CC7, 0x10C87],
[0x10CC8, 0x10C88],
[0x10CC9, 0x10C89],
[0x10CCA, 0x10C8A],
[0x10CCB, 0x10C8B],
[0x10CCC, 0x10C8C],
[0x10CCD, 0x10C8D],
[0x10CCE, 0x10C8E],
[0x10CCF, 0x10C8F],
[0x10CD0, 0x10C90],
[0x10CD1, 0x10C91],
[0x10CD2, 0x10C92],
[0x10CD3, 0x10C93],
[0x10CD4, 0x10C94],
[0x10CD5, 0x10C95],
[0x10CD6, 0x10C96],
[0x10CD7, 0x10C97],
[0x10CD8, 0x10C98],
[0x10CD9, 0x10C99],
[0x10CDA, 0x10C9A],
[0x10CDB, 0x10C9B],
[0x10CDC, 0x10C9C],
[0x10CDD, 0x10C9D],
[0x10CDE, 0x10C9E],
[0x10CDF, 0x10C9F],
[0x10CE0, 0x10CA0],
[0x10CE1, 0x10CA1],
[0x10CE2, 0x10CA2],
[0x10CE3, 0x10CA3],
[0x10CE4, 0x10CA4],
[0x10CE5, 0x10CA5],
[0x10CE6, 0x10CA6],
[0x10CE7, 0x10CA7],
[0x10CE8, 0x10CA8],
[0x10CE9, 0x10CA9],
[0x10CEA, 0x10CAA],
[0x10CEB, 0x10CAB],
[0x10CEC, 0x10CAC],
[0x10CED, 0x10CAD],
[0x10CEE, 0x10CAE],
[0x10CEF, 0x10CAF],
[0x10CF0, 0x10CB0],
[0x10CF1, 0x10CB1],
[0x10CF2, 0x10CB2],
[0x118A0, 0x118C0],
[0x118A1, 0x118C1],
[0x118A2, 0x118C2],
[0x118A3, 0x118C3],
[0x118A4, 0x118C4],
[0x118A5, 0x118C5],
[0x118A6, 0x118C6],
[0x118A7, 0x118C7],
[0x118A8, 0x118C8],
[0x118A9, 0x118C9],
[0x118AA, 0x118CA],
[0x118AB, 0x118CB],
[0x118AC, 0x118CC],
[0x118AD, 0x118CD],
[0x118AE, 0x118CE],
[0x118AF, 0x118CF],
[0x118B0, 0x118D0],
[0x118B1, 0x118D1],
[0x118B2, 0x118D2],
[0x118B3, 0x118D3],
[0x118B4, 0x118D4],
[0x118B5, 0x118D5],
[0x118B6, 0x118D6],
[0x118B7, 0x118D7],
[0x118B8, 0x118D8],
[0x118B9, 0x118D9],
[0x118BA, 0x118DA],
[0x118BB, 0x118DB],
[0x118BC, 0x118DC],
[0x118BD, 0x118DD],
[0x118BE, 0x118DE],
[0x118BF, 0x118DF],
[0x118C0, 0x118A0],
[0x118C1, 0x118A1],
[0x118C2, 0x118A2],
[0x118C3, 0x118A3],
[0x118C4, 0x118A4],
[0x118C5, 0x118A5],
[0x118C6, 0x118A6],
[0x118C7, 0x118A7],
[0x118C8, 0x118A8],
[0x118C9, 0x118A9],
[0x118CA, 0x118AA],
[0x118CB, 0x118AB],
[0x118CC, 0x118AC],
[0x118CD, 0x118AD],
[0x118CE, 0x118AE],
[0x118CF, 0x118AF],
[0x118D0, 0x118B0],
[0x118D1, 0x118B1],
[0x118D2, 0x118B2],
[0x118D3, 0x118B3],
[0x118D4, 0x118B4],
[0x118D5, 0x118B5],
[0x118D6, 0x118B6],
[0x118D7, 0x118B7],
[0x118D8, 0x118B8],
[0x118D9, 0x118B9],
[0x118DA, 0x118BA],
[0x118DB, 0x118BB],
[0x118DC, 0x118BC],
[0x118DD, 0x118BD],
[0x118DE, 0x118BE],
[0x118DF, 0x118BF],
[0x16E40, 0x16E60],
[0x16E41, 0x16E61],
[0x16E42, 0x16E62],
[0x16E43, 0x16E63],
[0x16E44, 0x16E64],
[0x16E45, 0x16E65],
[0x16E46, 0x16E66],
[0x16E47, 0x16E67],
[0x16E48, 0x16E68],
[0x16E49, 0x16E69],
[0x16E4A, 0x16E6A],
[0x16E4B, 0x16E6B],
[0x16E4C, 0x16E6C],
[0x16E4D, 0x16E6D],
[0x16E4E, 0x16E6E],
[0x16E4F, 0x16E6F],
[0x16E50, 0x16E70],
[0x16E51, 0x16E71],
[0x16E52, 0x16E72],
[0x16E53, 0x16E73],
[0x16E54, 0x16E74],
[0x16E55, 0x16E75],
[0x16E56, 0x16E76],
[0x16E57, 0x16E77],
[0x16E58, 0x16E78],
[0x16E59, 0x16E79],
[0x16E5A, 0x16E7A],
[0x16E5B, 0x16E7B],
[0x16E5C, 0x16E7C],
[0x16E5D, 0x16E7D],
[0x16E5E, 0x16E7E],
[0x16E5F, 0x16E7F],
[0x16E60, 0x16E40],
[0x16E61, 0x16E41],
[0x16E62, 0x16E42],
[0x16E63, 0x16E43],
[0x16E64, 0x16E44],
[0x16E65, 0x16E45],
[0x16E66, 0x16E46],
[0x16E67, 0x16E47],
[0x16E68, 0x16E48],
[0x16E69, 0x16E49],
[0x16E6A, 0x16E4A],
[0x16E6B, 0x16E4B],
[0x16E6C, 0x16E4C],
[0x16E6D, 0x16E4D],
[0x16E6E, 0x16E4E],
[0x16E6F, 0x16E4F],
[0x16E70, 0x16E50],
[0x16E71, 0x16E51],
[0x16E72, 0x16E52],
[0x16E73, 0x16E53],
[0x16E74, 0x16E54],
[0x16E75, 0x16E55],
[0x16E76, 0x16E56],
[0x16E77, 0x16E57],
[0x16E78, 0x16E58],
[0x16E79, 0x16E59],
[0x16E7A, 0x16E5A],
[0x16E7B, 0x16E5B],
[0x16E7C, 0x16E5C],
[0x16E7D, 0x16E5D],
[0x16E7E, 0x16E5E],
[0x16E7F, 0x16E5F],
[0x1E900, 0x1E922],
[0x1E901, 0x1E923],
[0x1E902, 0x1E924],
[0x1E903, 0x1E925],
[0x1E904, 0x1E926],
[0x1E905, 0x1E927],
[0x1E906, 0x1E928],
[0x1E907, 0x1E929],
[0x1E908, 0x1E92A],
[0x1E909, 0x1E92B],
[0x1E90A, 0x1E92C],
[0x1E90B, 0x1E92D],
[0x1E90C, 0x1E92E],
[0x1E90D, 0x1E92F],
[0x1E90E, 0x1E930],
[0x1E90F, 0x1E931],
[0x1E910, 0x1E932],
[0x1E911, 0x1E933],
[0x1E912, 0x1E934],
[0x1E913, 0x1E935],
[0x1E914, 0x1E936],
[0x1E915, 0x1E937],
[0x1E916, 0x1E938],
[0x1E917, 0x1E939],
[0x1E918, 0x1E93A],
[0x1E919, 0x1E93B],
[0x1E91A, 0x1E93C],
[0x1E91B, 0x1E93D],
[0x1E91C, 0x1E93E],
[0x1E91D, 0x1E93F],
[0x1E91E, 0x1E940],
[0x1E91F, 0x1E941],
[0x1E920, 0x1E942],
[0x1E921, 0x1E943],
[0x1E922, 0x1E900],
[0x1E923, 0x1E901],
[0x1E924, 0x1E902],
[0x1E925, 0x1E903],
[0x1E926, 0x1E904],
[0x1E927, 0x1E905],
[0x1E928, 0x1E906],
[0x1E929, 0x1E907],
[0x1E92A, 0x1E908],
[0x1E92B, 0x1E909],
[0x1E92C, 0x1E90A],
[0x1E92D, 0x1E90B],
[0x1E92E, 0x1E90C],
[0x1E92F, 0x1E90D],
[0x1E930, 0x1E90E],
[0x1E931, 0x1E90F],
[0x1E932, 0x1E910],
[0x1E933, 0x1E911],
[0x1E934, 0x1E912],
[0x1E935, 0x1E913],
[0x1E936, 0x1E914],
[0x1E937, 0x1E915],
[0x1E938, 0x1E916],
[0x1E939, 0x1E917],
[0x1E93A, 0x1E918],
[0x1E93B, 0x1E919],
[0x1E93C, 0x1E91A],
[0x1E93D, 0x1E91B],
[0x1E93E, 0x1E91C],
[0x1E93F, 0x1E91D],
[0x1E940, 0x1E91E],
[0x1E941, 0x1E91F],
[0x1E942, 0x1E920],
[0x1E943, 0x1E921]
]);
regexpu-core-4.8.0/demo.js 0000664 0000000 0000000 00000000753 14120103741 0015410 0 ustar 00root root 0000000 0000000 'use strict';
const rewritePattern = require('./rewrite-pattern.js');
const parse = require('regjsparser').parse;
const generate = require('regjsgen').generate;
const regenerate = require('regenerate');
const pattern = String.raw`-`;
console.log(generate(parse(pattern)));
console.log(regenerate('-'.codePointAt(0)).toString())
const processedPattern = rewritePattern(pattern, 'u', { useUnicodeFlag: true });
console.log(processedPattern);
// throws
new RegExp(processedPattern, 'u');
regexpu-core-4.8.0/package.json 0000664 0000000 0000000 00000003157 14120103741 0016415 0 ustar 00root root 0000000 0000000 {
"name": "regexpu-core",
"version": "4.8.0",
"description": "regexpu’s core functionality (i.e. `rewritePattern(pattern, flag)`), capable of translating ES6 Unicode regular expressions to ES5.",
"homepage": "https://mths.be/regexpu",
"main": "rewrite-pattern.js",
"engines": {
"node": ">=4"
},
"keywords": [
"codegen",
"desugaring",
"ecmascript",
"es5",
"es6",
"harmony",
"javascript",
"refactoring",
"regex",
"regexp",
"regular expressions",
"rewriting",
"syntax",
"transformation",
"transpile",
"transpiler",
"unicode"
],
"license": "MIT",
"author": {
"name": "Mathias Bynens",
"url": "https://mathiasbynens.be/"
},
"repository": {
"type": "git",
"url": "https://github.com/mathiasbynens/regexpu-core.git"
},
"bugs": "https://github.com/mathiasbynens/regexpu-core/issues",
"files": [
"LICENSE-MIT.txt",
"rewrite-pattern.js",
"data/character-class-escape-sets.js",
"data/iu-mappings.js"
],
"scripts": {
"build": "node scripts/iu-mappings.js && node scripts/character-class-escape-sets.js",
"test": "mocha tests",
"cover": "istanbul cover --report html node_modules/.bin/_mocha tests -- -u exports -R spec"
},
"dependencies": {
"regenerate": "^1.4.2",
"regenerate-unicode-properties": "^9.0.0",
"regjsgen": "^0.5.2",
"regjsparser": "^0.7.0",
"unicode-match-property-ecmascript": "^2.0.0",
"unicode-match-property-value-ecmascript": "^2.0.0"
},
"devDependencies": {
"codecov": "^3.8.3",
"istanbul": "^0.4.5",
"jsesc": "^3.0.2",
"lodash": "^4.17.21",
"mocha": "^9.1.1",
"regexpu-fixtures": "2.1.4",
"@unicode/unicode-14.0.0": "^1.2.1"
}
}
regexpu-core-4.8.0/property-escapes.md 0000664 0000000 0000000 00000022070 14120103741 0017751 0 ustar 00root root 0000000 0000000 # Unicode property escapes in _regexpu_
To opt-in to experimental support for [Unicode property escapes](https://github.com/mathiasbynens/es-regexp-unicode-property-escapes), enable [the `unicodePropertyEscape` option](README.md#unicodepropertyescape-default-false).
```js
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', {
'unicodePropertyEscape': true
});
// → '(?:\\uD811[\\uDC00-\\uDE46])'
```
If you’re targeting ES2015 environments exclusively, consider enabling [the `useUnicodeFlag` option](README.md#useunicodeflag-default-false) for simpler (but not necessarily more compact) output.
```js
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', {
'unicodePropertyEscape': true,
'useUnicodeFlag': true
});
// → '[\\u{14400}-\\u{14646}]'
```
[An online demo is available.](https://mothereff.in/regexpu#input=var+regex+%3D+/%5Cp%7BScript_Extensions%3DGreek%7D/u%3B&unicodePropertyEscape=1)
Note that this feature is non-standard. This implementation may or may not reflect what eventually gets specified.
What follows is an exhaustive overview of the Unicode properties and values that _regexpu_ supports in `\p{…}` and `\P{…}` expressions in regular expressions with the `u` flag.
## Non-binary properties
### `General_Category`
Possible values:
```sh
$ node -e 'require("regenerate-unicode-properties").get("General_Category").forEach(c => { console.log(`\\p{${c}}`); })'
\p{Cased_Letter}
\p{Close_Punctuation}
\p{Connector_Punctuation}
\p{Control}
\p{Currency_Symbol}
\p{Dash_Punctuation}
\p{Decimal_Number}
\p{Enclosing_Mark}
\p{Final_Punctuation}
\p{Format}
\p{Initial_Punctuation}
\p{Letter}
\p{Letter_Number}
\p{Line_Separator}
\p{Lowercase_Letter}
\p{Mark}
\p{Math_Symbol}
\p{Modifier_Letter}
\p{Modifier_Symbol}
\p{Nonspacing_Mark}
\p{Number}
\p{Open_Punctuation}
\p{Other}
\p{Other_Letter}
\p{Other_Number}
\p{Other_Punctuation}
\p{Other_Symbol}
\p{Paragraph_Separator}
\p{Private_Use}
\p{Punctuation}
\p{Separator}
\p{Space_Separator}
\p{Spacing_Mark}
\p{Surrogate}
\p{Symbol}
\p{Titlecase_Letter}
\p{Unassigned}
\p{Uppercase_Letter}
```
Note that the `General_Category=` prefix may be used, e.g. `\p{General_Category=Cased_Letter}`.
Category aliases may be used, e.g. `\p{Lc}` or `\p{General_Category=Lc}`, although IMHO it’s more readable to stick to the canonical category names listed above.
### `Script` & `Script_Extensions`
The sets of possible values for `Script` and `Script_Extensions` are identical:
```sh
$ node -e 'require("regenerate-unicode-properties").get("Script_Extensions").forEach(s => { console.log(`\\p{Script_Extensions=${s}}`); })'
\p{Script_Extensions=Adlam}
\p{Script_Extensions=Ahom}
\p{Script_Extensions=Anatolian_Hieroglyphs}
\p{Script_Extensions=Arabic}
\p{Script_Extensions=Armenian}
\p{Script_Extensions=Avestan}
\p{Script_Extensions=Balinese}
\p{Script_Extensions=Bamum}
\p{Script_Extensions=Bassa_Vah}
\p{Script_Extensions=Batak}
\p{Script_Extensions=Bengali}
\p{Script_Extensions=Bhaiksuki}
\p{Script_Extensions=Bopomofo}
\p{Script_Extensions=Brahmi}
\p{Script_Extensions=Braille}
\p{Script_Extensions=Buginese}
\p{Script_Extensions=Buhid}
\p{Script_Extensions=Canadian_Aboriginal}
\p{Script_Extensions=Carian}
\p{Script_Extensions=Caucasian_Albanian}
\p{Script_Extensions=Chakma}
\p{Script_Extensions=Cham}
\p{Script_Extensions=Cherokee}
\p{Script_Extensions=Chorasmian}
\p{Script_Extensions=Common}
\p{Script_Extensions=Coptic}
\p{Script_Extensions=Cuneiform}
\p{Script_Extensions=Cypriot}
\p{Script_Extensions=Cypro_Minoan}
\p{Script_Extensions=Cyrillic}
\p{Script_Extensions=Deseret}
\p{Script_Extensions=Devanagari}
\p{Script_Extensions=Dives_Akuru}
\p{Script_Extensions=Dogra}
\p{Script_Extensions=Duployan}
\p{Script_Extensions=Egyptian_Hieroglyphs}
\p{Script_Extensions=Elbasan}
\p{Script_Extensions=Elymaic}
\p{Script_Extensions=Ethiopic}
\p{Script_Extensions=Georgian}
\p{Script_Extensions=Glagolitic}
\p{Script_Extensions=Gothic}
\p{Script_Extensions=Grantha}
\p{Script_Extensions=Greek}
\p{Script_Extensions=Gujarati}
\p{Script_Extensions=Gunjala_Gondi}
\p{Script_Extensions=Gurmukhi}
\p{Script_Extensions=Han}
\p{Script_Extensions=Hangul}
\p{Script_Extensions=Hanifi_Rohingya}
\p{Script_Extensions=Hanunoo}
\p{Script_Extensions=Hatran}
\p{Script_Extensions=Hebrew}
\p{Script_Extensions=Hiragana}
\p{Script_Extensions=Imperial_Aramaic}
\p{Script_Extensions=Inherited}
\p{Script_Extensions=Inscriptional_Pahlavi}
\p{Script_Extensions=Inscriptional_Parthian}
\p{Script_Extensions=Javanese}
\p{Script_Extensions=Kaithi}
\p{Script_Extensions=Kannada}
\p{Script_Extensions=Katakana}
\p{Script_Extensions=Kayah_Li}
\p{Script_Extensions=Kharoshthi}
\p{Script_Extensions=Khitan_Small_Script}
\p{Script_Extensions=Khmer}
\p{Script_Extensions=Khojki}
\p{Script_Extensions=Khudawadi}
\p{Script_Extensions=Lao}
\p{Script_Extensions=Latin}
\p{Script_Extensions=Lepcha}
\p{Script_Extensions=Limbu}
\p{Script_Extensions=Linear_A}
\p{Script_Extensions=Linear_B}
\p{Script_Extensions=Lisu}
\p{Script_Extensions=Lycian}
\p{Script_Extensions=Lydian}
\p{Script_Extensions=Mahajani}
\p{Script_Extensions=Makasar}
\p{Script_Extensions=Malayalam}
\p{Script_Extensions=Mandaic}
\p{Script_Extensions=Manichaean}
\p{Script_Extensions=Marchen}
\p{Script_Extensions=Masaram_Gondi}
\p{Script_Extensions=Medefaidrin}
\p{Script_Extensions=Meetei_Mayek}
\p{Script_Extensions=Mende_Kikakui}
\p{Script_Extensions=Meroitic_Cursive}
\p{Script_Extensions=Meroitic_Hieroglyphs}
\p{Script_Extensions=Miao}
\p{Script_Extensions=Modi}
\p{Script_Extensions=Mongolian}
\p{Script_Extensions=Mro}
\p{Script_Extensions=Multani}
\p{Script_Extensions=Myanmar}
\p{Script_Extensions=Nabataean}
\p{Script_Extensions=Nandinagari}
\p{Script_Extensions=New_Tai_Lue}
\p{Script_Extensions=Newa}
\p{Script_Extensions=Nko}
\p{Script_Extensions=Nushu}
\p{Script_Extensions=Nyiakeng_Puachue_Hmong}
\p{Script_Extensions=Ogham}
\p{Script_Extensions=Ol_Chiki}
\p{Script_Extensions=Old_Hungarian}
\p{Script_Extensions=Old_Italic}
\p{Script_Extensions=Old_North_Arabian}
\p{Script_Extensions=Old_Permic}
\p{Script_Extensions=Old_Persian}
\p{Script_Extensions=Old_Sogdian}
\p{Script_Extensions=Old_South_Arabian}
\p{Script_Extensions=Old_Turkic}
\p{Script_Extensions=Old_Uyghur}
\p{Script_Extensions=Oriya}
\p{Script_Extensions=Osage}
\p{Script_Extensions=Osmanya}
\p{Script_Extensions=Pahawh_Hmong}
\p{Script_Extensions=Palmyrene}
\p{Script_Extensions=Pau_Cin_Hau}
\p{Script_Extensions=Phags_Pa}
\p{Script_Extensions=Phoenician}
\p{Script_Extensions=Psalter_Pahlavi}
\p{Script_Extensions=Rejang}
\p{Script_Extensions=Runic}
\p{Script_Extensions=Samaritan}
\p{Script_Extensions=Saurashtra}
\p{Script_Extensions=Sharada}
\p{Script_Extensions=Shavian}
\p{Script_Extensions=Siddham}
\p{Script_Extensions=SignWriting}
\p{Script_Extensions=Sinhala}
\p{Script_Extensions=Sogdian}
\p{Script_Extensions=Sora_Sompeng}
\p{Script_Extensions=Soyombo}
\p{Script_Extensions=Sundanese}
\p{Script_Extensions=Syloti_Nagri}
\p{Script_Extensions=Syriac}
\p{Script_Extensions=Tagalog}
\p{Script_Extensions=Tagbanwa}
\p{Script_Extensions=Tai_Le}
\p{Script_Extensions=Tai_Tham}
\p{Script_Extensions=Tai_Viet}
\p{Script_Extensions=Takri}
\p{Script_Extensions=Tamil}
\p{Script_Extensions=Tangsa}
\p{Script_Extensions=Tangut}
\p{Script_Extensions=Telugu}
\p{Script_Extensions=Thaana}
\p{Script_Extensions=Thai}
\p{Script_Extensions=Tibetan}
\p{Script_Extensions=Tifinagh}
\p{Script_Extensions=Tirhuta}
\p{Script_Extensions=Toto}
\p{Script_Extensions=Ugaritic}
\p{Script_Extensions=Vai}
\p{Script_Extensions=Vithkuqi}
\p{Script_Extensions=Wancho}
\p{Script_Extensions=Warang_Citi}
\p{Script_Extensions=Yezidi}
\p{Script_Extensions=Yi}
\p{Script_Extensions=Zanabazar_Square}
```
Note that script name aliases may be used as well, e.g. `\p{Script_Extensions=Aghb}`, although IMHO it’s more readable to stick to the canonical script names listed above.
## Binary properties
The following binary properties are supported:
```sh
$ node -e 'require("regenerate-unicode-properties").get("Binary_Property").forEach(p => { console.log(`\\p{${p}}`); })'
\p{ASCII}
\p{ASCII_Hex_Digit}
\p{Alphabetic}
\p{Any}
\p{Assigned}
\p{Bidi_Control}
\p{Bidi_Mirrored}
\p{Case_Ignorable}
\p{Cased}
\p{Changes_When_Casefolded}
\p{Changes_When_Casemapped}
\p{Changes_When_Lowercased}
\p{Changes_When_NFKC_Casefolded}
\p{Changes_When_Titlecased}
\p{Changes_When_Uppercased}
\p{Dash}
\p{Default_Ignorable_Code_Point}
\p{Deprecated}
\p{Diacritic}
\p{Emoji}
\p{Emoji_Component}
\p{Emoji_Modifier}
\p{Emoji_Modifier_Base}
\p{Emoji_Presentation}
\p{Extended_Pictographic}
\p{Extender}
\p{Grapheme_Base}
\p{Grapheme_Extend}
\p{Hex_Digit}
\p{IDS_Binary_Operator}
\p{IDS_Trinary_Operator}
\p{ID_Continue}
\p{ID_Start}
\p{Ideographic}
\p{Join_Control}
\p{Logical_Order_Exception}
\p{Lowercase}
\p{Math}
\p{Noncharacter_Code_Point}
\p{Pattern_Syntax}
\p{Pattern_White_Space}
\p{Quotation_Mark}
\p{Radical}
\p{Regional_Indicator}
\p{Sentence_Terminal}
\p{Soft_Dotted}
\p{Terminal_Punctuation}
\p{Unified_Ideograph}
\p{Uppercase}
\p{Variation_Selector}
\p{White_Space}
\p{XID_Continue}
\p{XID_Start}
```
Note that property name aliases may be used as well, e.g. `\p{AHex}`, although IMHO it’s more readable to stick to the canonical property names listed above.
regexpu-core-4.8.0/rewrite-pattern.js 0000664 0000000 0000000 00000023553 14120103741 0017623 0 ustar 00root root 0000000 0000000 'use strict';
const generate = require('regjsgen').generate;
const parse = require('regjsparser').parse;
const regenerate = require('regenerate');
const unicodeMatchProperty = require('unicode-match-property-ecmascript');
const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
const iuMappings = require('./data/iu-mappings.js');
const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
// Prepare a Regenerate set containing all code points, used for negative
// character classes (if any).
const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);
// Without the `u` flag, the range stops at 0xFFFF.
// https://mths.be/es6#sec-pattern-semantics
const BMP_SET = regenerate().addRange(0x0, 0xFFFF);
// Prepare a Regenerate set containing all code points that are supposed to be
// matched by `/./u`. https://mths.be/es6#sec-atom
const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
.remove(
// minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
0x000A, // Line Feed
0x000D, // Carriage Return
0x2028, // Line Separator
0x2029 // Paragraph Separator
);
const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => {
if (unicode) {
if (ignoreCase) {
return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
}
return ESCAPE_SETS.UNICODE.get(character);
}
return ESCAPE_SETS.REGULAR.get(character);
};
const getUnicodeDotSet = (dotAll) => {
return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
};
const getUnicodePropertyValueSet = (property, value) => {
const path = value ?
`${ property }/${ value }` :
`Binary_Property/${ property }`;
try {
return require(`regenerate-unicode-properties/${ path }.js`);
} catch (exception) {
throw new Error(
`Failed to recognize value \`${ value }\` for property ` +
`\`${ property }\`.`
);
}
};
const handleLoneUnicodePropertyNameOrValue = (value) => {
// It could be a `General_Category` value or a binary property.
// Note: `unicodeMatchPropertyValue` throws on invalid values.
try {
const property = 'General_Category';
const category = unicodeMatchPropertyValue(property, value);
return getUnicodePropertyValueSet(property, category);
} catch (exception) {}
// It’s not a `General_Category` value, so check if it’s a binary
// property. Note: `unicodeMatchProperty` throws on invalid properties.
const property = unicodeMatchProperty(value);
return getUnicodePropertyValueSet(property);
};
const getUnicodePropertyEscapeSet = (value, isNegative) => {
const parts = value.split('=');
const firstPart = parts[0];
let set;
if (parts.length == 1) {
set = handleLoneUnicodePropertyNameOrValue(firstPart);
} else {
// The pattern consists of two parts, i.e. `Property=Value`.
const property = unicodeMatchProperty(firstPart);
const value = unicodeMatchPropertyValue(property, parts[1]);
set = getUnicodePropertyValueSet(property, value);
}
if (isNegative) {
return UNICODE_SET.clone().remove(set);
}
return set.clone();
};
// Given a range of code points, add any case-folded code points in that range
// to a set.
regenerate.prototype.iuAddRange = function(min, max) {
const $this = this;
do {
const folded = caseFold(min);
if (folded) {
$this.add(folded);
}
} while (++min <= max);
return $this;
};
const update = (item, pattern) => {
let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');
switch (tree.type) {
case 'characterClass':
case 'group':
case 'value':
// No wrapping needed.
break;
default:
// Wrap the pattern in a non-capturing group.
tree = wrap(tree, pattern);
}
Object.assign(item, tree);
};
const wrap = (tree, pattern) => {
// Wrap the pattern in a non-capturing group.
return {
'type': 'group',
'behavior': 'ignore',
'body': [tree],
'raw': `(?:${ pattern })`
};
};
const caseFold = (codePoint) => {
return iuMappings.get(codePoint) || false;
};
const processCharacterClass = (characterClassItem, regenerateOptions) => {
const set = regenerate();
for (const item of characterClassItem.body) {
switch (item.type) {
case 'value':
set.add(item.codePoint);
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
const folded = caseFold(item.codePoint);
if (folded) {
set.add(folded);
}
}
break;
case 'characterClassRange':
const min = item.min.codePoint;
const max = item.max.codePoint;
set.addRange(min, max);
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
set.iuAddRange(min, max);
}
break;
case 'characterClassEscape':
set.add(getCharacterClassEscapeSet(
item.value,
config.unicode,
config.ignoreCase
));
break;
case 'unicodePropertyEscape':
set.add(getUnicodePropertyEscapeSet(item.value, item.negative));
break;
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* istanbul ignore next */
default:
throw new Error(`Unknown term type: ${ item.type }`);
}
}
if (characterClassItem.negative) {
update(characterClassItem, `(?!${set.toString(regenerateOptions)})[\\s\\S]`)
} else {
update(characterClassItem, set.toString(regenerateOptions));
}
return characterClassItem;
};
const updateNamedReference = (item, index) => {
delete item.name;
item.matchIndex = index;
};
const assertNoUnmatchedReferences = (groups) => {
const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
if (unmatchedReferencesNames.length > 0) {
throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
}
};
const processTerm = (item, regenerateOptions, groups) => {
switch (item.type) {
case 'dot':
if (config.useDotAllFlag) {
break;
} else if (config.unicode) {
update(
item,
getUnicodeDotSet(config.dotAll).toString(regenerateOptions)
);
} else if (config.dotAll) {
// TODO: consider changing this at the regenerate level.
update(item, '[\\s\\S]');
}
break;
case 'characterClass':
item = processCharacterClass(item, regenerateOptions);
break;
case 'unicodePropertyEscape':
if (config.unicodePropertyEscape) {
update(
item,
getUnicodePropertyEscapeSet(item.value, item.negative)
.toString(regenerateOptions)
);
}
break;
case 'characterClassEscape':
update(
item,
getCharacterClassEscapeSet(
item.value,
config.unicode,
config.ignoreCase
).toString(regenerateOptions)
);
break;
case 'group':
if (item.behavior == 'normal') {
groups.lastIndex++;
}
if (item.name && config.namedGroup) {
const name = item.name.value;
if (groups.names[name]) {
throw new Error(
`Multiple groups with the same name (${ name }) are not allowed.`
);
}
const index = groups.lastIndex;
delete item.name;
groups.names[name] = index;
if (groups.onNamedGroup) {
groups.onNamedGroup.call(null, name, index);
}
if (groups.unmatchedReferences[name]) {
groups.unmatchedReferences[name].forEach(reference => {
updateNamedReference(reference, index);
});
delete groups.unmatchedReferences[name];
}
}
/* falls through */
case 'alternative':
case 'disjunction':
case 'quantifier':
item.body = item.body.map(term => {
return processTerm(term, regenerateOptions, groups);
});
break;
case 'value':
const codePoint = item.codePoint;
const set = regenerate(codePoint);
if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
const folded = caseFold(codePoint);
if (folded) {
set.add(folded);
}
}
update(item, set.toString(regenerateOptions));
break;
case 'reference':
if (item.name) {
const name = item.name.value;
const index = groups.names[name];
if (index) {
updateNamedReference(item, index);
break;
}
if (!groups.unmatchedReferences[name]) {
groups.unmatchedReferences[name] = [];
}
// Keep track of references used before the corresponding group.
groups.unmatchedReferences[name].push(item);
}
break;
case 'anchor':
case 'empty':
case 'group':
// Nothing to do here.
break;
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* istanbul ignore next */
default:
throw new Error(`Unknown term type: ${ item.type }`);
}
return item;
};
const config = {
'ignoreCase': false,
'unicode': false,
'dotAll': false,
'useDotAllFlag': false,
'useUnicodeFlag': false,
'unicodePropertyEscape': false,
'namedGroup': false
};
const rewritePattern = (pattern, flags, options) => {
config.unicode = flags && flags.includes('u');
const regjsparserFeatures = {
'unicodePropertyEscape': config.unicode,
'namedGroups': true,
'lookbehind': options && options.lookbehind
};
config.ignoreCase = flags && flags.includes('i');
const supportDotAllFlag = options && options.dotAllFlag;
config.dotAll = supportDotAllFlag && flags && flags.includes('s');
config.namedGroup = options && options.namedGroup;
config.useDotAllFlag = options && options.useDotAllFlag;
config.useUnicodeFlag = options && options.useUnicodeFlag;
config.unicodePropertyEscape = options && options.unicodePropertyEscape;
if (supportDotAllFlag && config.useDotAllFlag) {
throw new Error('`useDotAllFlag` and `dotAllFlag` cannot both be true!');
}
const regenerateOptions = {
'hasUnicodeFlag': config.useUnicodeFlag,
'bmpOnly': !config.unicode
};
const groups = {
'onNamedGroup': options && options.onNamedGroup,
'lastIndex': 0,
'names': Object.create(null), // { [name]: index }
'unmatchedReferences': Object.create(null) // { [name]: Array }
};
const tree = parse(pattern, flags, regjsparserFeatures);
// Note: `processTerm` mutates `tree` and `groups`.
processTerm(tree, regenerateOptions, groups);
assertNoUnmatchedReferences(groups);
return generate(tree);
};
module.exports = rewritePattern;
regexpu-core-4.8.0/scripts/ 0000775 0000000 0000000 00000000000 14120103741 0015610 5 ustar 00root root 0000000 0000000 regexpu-core-4.8.0/scripts/character-class-escape-sets.js 0000664 0000000 0000000 00000007775 14120103741 0023437 0 ustar 00root root 0000000 0000000 'use strict';
const fs = require('fs');
const jsesc = require('jsesc');
const regenerate = require('regenerate');
const Zs = require('@unicode/unicode-14.0.0/General_Category/Space_Separator/code-points.js');
const iuMappings = require('../data/iu-mappings.js');
const caseFold = (codePoint) => {
return iuMappings.get(codePoint) || false;
};
// Prepare a Regenerate set containing all code points, used for negative
// character classes (if any).
const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);
// Without the `u` flag, the range stops at 0xFFFF.
// https://mths.be/es#sec-pattern-semantics
const BMP_SET = regenerate().addRange(0x0, 0xFFFF);
const ESCAPE_CHARS = {};
const ESCAPE_CHARS_UNICODE = {};
const ESCAPE_CHARS_UNICODE_IGNORE_CASE = {};
const addCharacterClassEscape = (lower, set) => {
ESCAPE_CHARS[lower] = ESCAPE_CHARS_UNICODE[lower] = set;
const upper = lower.toUpperCase();
ESCAPE_CHARS[upper] = BMP_SET.clone().remove(set);
ESCAPE_CHARS_UNICODE[upper] = UNICODE_SET.clone().remove(set);
// Check if one or more symbols in this set fold to another one. If so,
// a copy of the set including the mapped symbols is created for use with
// regular expressions that have both the `u` and `i` flags set.
const codePoints = set.toArray();
const iuSet = regenerate();
let containsFoldingSymbols = false;
for (const codePoint of codePoints) {
let folded = caseFold(codePoint);
if (folded) {
containsFoldingSymbols = true;
iuSet.add(folded);
folded = caseFold(folded);
if (folded) {
iuSet.add(folded);
}
}
}
const iuLowerSet = containsFoldingSymbols ?
iuSet.clone().add(set) :
set;
const iuUpperSet = UNICODE_SET.clone().remove(iuLowerSet);
ESCAPE_CHARS_UNICODE_IGNORE_CASE[lower] = iuLowerSet;
ESCAPE_CHARS_UNICODE_IGNORE_CASE[upper] = iuUpperSet;
}
// Prepare a Regenerate set for every existing character class escape.
// https://mths.be/es#sec-characterclassescape
addCharacterClassEscape(
'd', // `\d` and `\D`
regenerate().addRange('0', '9')
);
addCharacterClassEscape(
's', // `\s` and `\S`
regenerate(
// https://mths.be/es#sec-white-space
0x0009,
0x000B,
0x000C,
0x0020,
0x00A0,
0xFEFF,
Zs,
// https://mths.be/es#sec-line-terminators
0x000A,
0x000D,
0x2028,
0x2029
)
);
addCharacterClassEscape(
'w', // `\w` and `\W`
regenerate('_').addRange('a', 'z').addRange('A', 'Z').addRange('0', '9')
);
/*----------------------------------------------------------------------------*/
const codePointToString = (codePoint) => {
return '0x' + codePoint.toString(16).toUpperCase();
};
// Regenerate plugin that turns a set into some JavaScript source code that
// generates that set.
regenerate.prototype.toCode = function() {
const data = this.data;
// Iterate over the data per `(start, end)` pair.
let index = 0;
let start;
let end;
const length = data.length;
const loneCodePoints = [];
const ranges = [];
while (index < length) {
start = data[index];
end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
if (start == end) {
loneCodePoints.push(codePointToString(start));
} else {
ranges.push(
'addRange(' + codePointToString(start) +
', ' + codePointToString(end) + ')'
);
}
index += 2;
}
return 'regenerate(' + loneCodePoints.join(', ') + ')' +
(ranges.length ? '\n\t\t.' + ranges.join('\n\t\t.') : '');
};
const stringify = (name, object) => {
const source = 'exports.' + name + ' = new Map([\n\t' + Object.keys(object).map((character) => {
const set = object[character];
return '[' + jsesc(character, { 'wrap': true }) + ', ' + set.toCode() + ']';
}).join(',\n\t') + '\n]);';
return source;
};
const source = [
'// Generated using `npm run build`. Do not edit.\n' +
`'use strict';\n\nconst regenerate = require('regenerate');`,
stringify('REGULAR', ESCAPE_CHARS),
stringify('UNICODE', ESCAPE_CHARS_UNICODE),
stringify('UNICODE_IGNORE_CASE', ESCAPE_CHARS_UNICODE_IGNORE_CASE)
].join('\n\n');
// Save the precompiled sets to a static file.
fs.writeFileSync('data/character-class-escape-sets.js', source + '\n');
regexpu-core-4.8.0/scripts/iu-mappings.js 0000664 0000000 0000000 00000015367 14120103741 0020413 0 ustar 00root root 0000000 0000000 'use strict';
const fs = require('fs');
const _ = require('lodash');
const jsesc = require('jsesc');
const hex = (number) => {
return `0x${ number.toString(16).toUpperCase() }`;
};
const writeMap = (fileName, map) => {
// Sort map by key.
const sortedMap = new Map([...map].sort((a, b) => a[0] - b[0]));
fs.writeFileSync(
fileName,
`module.exports = ${ jsesc(sortedMap, {
'compact': false,
'numbers': 'hexadecimal'
}) };\n`
);
}
// Given two code points, check if both are in the ASCII range and if one is
// the uppercased version of the other. In that case, ES5 engines know about
// this mapping, so it’s only needed to include one of the two in a
// case-insensitive regular expression.
const isES5CasedVariant = (a, b) => {
return (a < 0x80 && b < 0x80) &&
(oneWayMappings.get(a) == b || oneWayMappings.get(b) == a);
};
const extend = (map, key, value, callback) => {
if (map.has(key)) {
const currentValue = map.get(key);
if (Array.isArray(currentValue)) {
if (currentValue.indexOf(value) > -1) {
return;
}
if (callback) {
const skip = currentValue.some((codePoint) => {
return callback(codePoint, value);
});
if (skip) {
return;
}
}
currentValue.push(value);
} else {
if (currentValue == value) {
return;
}
if (callback) {
if (callback(currentValue, value)) {
return;
}
}
map.set(key, [currentValue, value]);
}
} else {
map.set(key, value);
}
};
// From :
//
// The status field is:
// C: common case folding, common mappings shared by both simple and full
// mappings.
// F: full case folding, mappings that cause strings to grow in length. Multiple
// characters are separated by spaces.
// S: simple case folding, mappings to single characters where different from F.
// T: special case for uppercase I and dotted uppercase I
// - For non-Turkic languages, this mapping is normally not used.
// - For Turkic languages (tr, az), this mapping can be used instead of the
// normal mapping for these characters. Note that the Turkic mappings do
// not maintain canonical equivalence without additional processing.
// See the discussions of case mapping in the Unicode Standard for more
// information.
//
// Usage:
// A. To do a simple case folding, use the mappings with status C + S.
// B. To do a full case folding, use the mappings with status C + F.
const commonMappings = require('@unicode/unicode-14.0.0/Case_Folding/C/code-points.js');
const simpleMappings = require('@unicode/unicode-14.0.0/Case_Folding/S/code-points.js');
// We want the `C` mappings in both directions (i.e. `A` should fold to `a`
// and `a` to `A`), and the `S` mappings in both directions (i.e. `ẞ` should
// fold to `ß` and `ß` to `ẞ`). Let’s start with the simple case folding (in
// one direction) first, then filter the set, and then deal with the inverse.
const oneWayMappings = new Map();
for (const [from, to] of commonMappings) {
oneWayMappings.set(from, to);
}
for (const [from, to] of simpleMappings) {
oneWayMappings.set(from, to);
}
// Note: various code points can fold into the same code point, so it’s not
// possible to simply invert `oneWayMappings` — some entries would be lost in
// the process.
// In case-insignificant matches when `Unicode` is `true` (i.e. when the `u`
// flag is enabled), all characters are implicitly case-folded using the
// simple mapping provided by the Unicode standard immediately before they
// are compared. The simple mapping always maps to a single code point, so it
// does not map, for example, `ß` (U+00DF) to `SS`. It may however map a code
// point outside the Basic Latin range to a character within, for example, `ſ`
// (U+017F) to `s`. Such characters are not mapped if `Unicode` is `false`.
// This prevents Unicode code points such as U+017F and U+212A from matching
// regular expressions such as `/[a‑z]/i`, but they will match `/[a‑z]/ui`.
// https://mths.be/es6#sec-runtime-semantics-canonicalize-abstract-operation
// Get the mappings that are unique to regular expressions that have both the
// `i` and `u` flags set. In addition to the above, this includes all mappings
// for astral code points.
const filteredMappings = new Map();
for (const [from, to] of oneWayMappings) {
// Case folding is applied to both the pattern and the string being matched.
// Because of that e.g. `/[A-Z]/iu` matches U+017F and U+212A, just like
// `/[a-z]/iu` would, even though no symbol in the range from `A` to `Z`
// folds to U+017F or U+212A directly. Since we’re only transpiling regular
// expressions and not strings, we have to account for this in regular
// expressions only. This can be done as per this example:
// 1. `oneWayMappings` already maps `S` to `s`. (83 → 115)
// 2. `oneWayMappings` already maps `ſ` to `s`. (383 → 115)
// 3. So, in the generated mappings, make sure `S` maps to `ſ`. (83 → 383)
// Check if there are any other code points that map to the same `to` value.
for (const [otherFrom, otherTo] of oneWayMappings) {
if (otherFrom != from && otherTo == to) {
// Note: we could use `extend` here, but it’s not necessary as there can
// only be a single value for the key `from` at this point.
filteredMappings.set(from, otherFrom);
}
}
if (
// Include astral code points.
(from > 0xFFFF || to > 0xFFFF) ||
// Exclude ES5 mappings as per the above comment.
// https://mths.be/es6#sec-runtime-semantics-canonicalize-abstract-operation
(
// TODO: Make this not depend on the engine in which this build script
// runs. (If V8 has a bug, then the generated data has the same bug.)
!RegExp(String.fromCodePoint(from), 'i').test(String.fromCodePoint(to))
)
) {
extend(filteredMappings, from, to);
} else {
const stringFrom = String.fromCodePoint(from);
const stringTo = String.fromCodePoint(to);
const code = `/${
jsesc(stringFrom)
}/i.test(${
jsesc(stringTo, { 'wrap': true })
})`;
console.log(
`Skipping ${ hex(from) } → ${ hex(to) } since ${ code } is already \`true\`.`
);
// The following snippet was used to create https://mths.be/demo/regex-i.
// https://github.com/mathiasbynens/regexpu-core/issues/7#issuecomment-225894534
// console.log(
// `console.assert(${ code }, ${ JSON.stringify(code) });`
// );
}
}
// Create a new object containing all `filteredMappings` and their inverse.
const iuMappings = new Map();
for (const [from, to] of filteredMappings) {
if (Array.isArray(to)) {
for (const codePoint of to) {
extend(iuMappings, from, codePoint, isES5CasedVariant);
extend(iuMappings, codePoint, from, isES5CasedVariant);
}
} else {
extend(iuMappings, from, to, isES5CasedVariant);
extend(iuMappings, to, from, isES5CasedVariant);
}
}
writeMap('data/iu-mappings.js', iuMappings);
regexpu-core-4.8.0/tests/ 0000775 0000000 0000000 00000000000 14120103741 0015263 5 ustar 00root root 0000000 0000000 regexpu-core-4.8.0/tests/tests.js 0000664 0000000 0000000 00000055341 14120103741 0016773 0 ustar 00root root 0000000 0000000 'use strict';
const assert = require('assert');
const regenerate = require('regenerate');
const rewritePattern = require('../rewrite-pattern.js');
const fixtures = require('regexpu-fixtures');
const BMP_SET = regenerate().addRange(0x0, 0xFFFF);
const BMP_PATTERN = BMP_SET.toString({ 'bmpOnly': true });
const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);
const UNICODE_PATTERN = UNICODE_SET.toString();
describe('rewritePattern', () => {
for (const fixture of fixtures) {
const pattern = fixture.pattern;
for (const flag of fixture.flags) {
it('rewrites `/' + pattern + '/' + flag + '` correctly', () => {
assert.equal(rewritePattern(pattern, flag), fixture.transpiled);
});
}
}
});
const unicodePropertyEscapeFixtures = [
// https://unicode.org/reports/tr18/#RL1.2 item 1
{
'path': 'General_Category/Uppercase_Letter',
'expressions': [
'gc=Lu',
'gc=Uppercase_Letter',
'General_Category=Lu',
'General_Category=Uppercase_Letter',
'Lu',
'Uppercase_Letter'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 2a
{
'path': 'Script/Greek',
'expressions': [
'sc=Grek',
'sc=Greek',
'Script=Grek',
'Script=Greek'
]
},
{
'path': 'Script/Hiragana',
'expressions': [
'sc=Hira',
'sc=Hiragana',
'Script=Hira',
'Script=Hiragana'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 2b
{
'path': 'Script_Extensions/Greek',
'expressions': [
'scx=Grek',
'scx=Greek',
'Script_Extensions=Grek',
'Script_Extensions=Greek'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 3
{
'path': 'Binary_Property/Alphabetic',
'expressions': [
'Alpha',
'Alphabetic'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 4
{
'path': 'Binary_Property/Uppercase',
'expressions': [
'Upper',
'Uppercase'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 5
{
'path': 'Binary_Property/Lowercase',
'expressions': [
'Lower',
'Lowercase'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 6
{
'path': 'Binary_Property/White_Space',
'expressions': [
'WSpace',
'White_Space'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 7
{
'path': 'Binary_Property/Noncharacter_Code_Point',
'expressions': [
'NChar',
'Noncharacter_Code_Point'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 8
{
'path': 'Binary_Property/Default_Ignorable_Code_Point',
'expressions': [
'DI',
'Default_Ignorable_Code_Point'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 9a
{
'path': 'Binary_Property/Any',
'expressions': [
'Any'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 9b
{
'path': 'Binary_Property/ASCII',
'expressions': [
'ASCII'
]
},
// https://unicode.org/reports/tr18/#RL1.2 item 9c
{
'path': 'Binary_Property/Assigned',
'expressions': [
'Assigned'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/ASCII_Hex_Digit',
'expressions': [
'ASCII_Hex_Digit',
'AHex'
]
},
// https://unicode.org/reports/tr18/#RL2.7
// {
// 'path': 'Bidi_Class/Arabic_Letter',
// 'expressions': [
// 'bc=AL',
// 'bc=Arabic_Letter',
// 'Bidi_Class=AL',
// 'Bidi_Class=Arabic_Letter'
// ]
// },
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Bidi_Control',
'expressions': [
'Bidi_C',
'Bidi_Control'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Bidi_Mirrored',
'expressions': [
'Bidi_M',
'Bidi_Mirrored'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Case_Ignorable',
'expressions': [
'CI',
'Case_Ignorable',
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Cased',
'expressions': [
'Cased'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Changes_When_NFKC_Casefolded',
'expressions': [
'CWKCF',
'Changes_When_NFKC_Casefolded'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Changes_When_Casefolded',
'expressions': [
'CWCF',
'Changes_When_Casefolded'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Changes_When_Casemapped',
'expressions': [
'CWCM',
'Changes_When_Casemapped'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Changes_When_Lowercased',
'expressions': [
'CWL',
'Changes_When_Lowercased'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Changes_When_Titlecased',
'expressions': [
'CWT',
'Changes_When_Titlecased'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Changes_When_Uppercased',
'expressions': [
'CWU',
'Changes_When_Uppercased'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Dash',
'expressions': [
'Dash'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Deprecated',
'expressions': [
'Dep',
'Deprecated'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Diacritic',
'expressions': [
'Dia',
'Diacritic'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Extender',
'expressions': [
'Ext',
'Extender'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Grapheme_Base',
'expressions': [
'Gr_Base',
'Grapheme_Base'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Grapheme_Extend',
'expressions': [
'Gr_Ext',
'Grapheme_Extend'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Hex_Digit',
'expressions': [
'Hex',
'Hex_Digit'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/ID_Continue',
'expressions': [
'IDC',
'ID_Continue'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/ID_Start',
'expressions': [
'IDS',
'ID_Start'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Ideographic',
'expressions': [
'Ideo',
'Ideographic'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/IDS_Binary_Operator',
'expressions': [
'IDSB',
'IDS_Binary_Operator'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/IDS_Trinary_Operator',
'expressions': [
'IDST',
'IDS_Trinary_Operator'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Join_Control',
'expressions': [
'Join_C',
'Join_Control'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Logical_Order_Exception',
'expressions': [
'LOE',
'Logical_Order_Exception'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Math',
'expressions': [
'Math'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Pattern_Syntax',
'expressions': [
'Pat_Syn',
'Pattern_Syntax'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Pattern_White_Space',
'expressions': [
'Pat_WS',
'Pattern_White_Space'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Quotation_Mark',
'expressions': [
'QMark',
'Quotation_Mark'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Radical',
'expressions': [
'Radical'
]
},
{
'path': 'Binary_Property/Regional_Indicator',
'expressions': [
'RI',
'Regional_Indicator'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Soft_Dotted',
'expressions': [
'SD',
'Soft_Dotted'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Sentence_Terminal',
'expressions': [
'STerm',
'Sentence_Terminal'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Terminal_Punctuation',
'expressions': [
'Term',
'Terminal_Punctuation'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Unified_Ideograph',
'expressions': [
'UIdeo',
'Unified_Ideograph'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/Variation_Selector',
'expressions': [
'VS',
'Variation_Selector'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/XID_Continue',
'expressions': [
'XIDC',
'XID_Continue'
]
},
// https://unicode.org/reports/tr18/#RL2.7
{
'path': 'Binary_Property/XID_Start',
'expressions': [
'XIDS',
'XID_Start'
]
},
// https://unicode.org/reports/tr18/#RL2.7
// {
// 'path': 'Bidi_Paired_Bracket_Type/Open',
// 'expressions': [
// 'bpt=o',
// 'bpt=Open',
// 'Bidi_Paired_Bracket_Type=o',
// 'Bidi_Paired_Bracket_Type=Open'
// ]
// },
// https://unicode.org/reports/tr51/
{
'path': 'Binary_Property/Emoji',
'expressions': [
'Emoji'
]
},
// https://unicode.org/reports/tr51/
{
'path': 'Binary_Property/Emoji_Component',
'expressions': [
'Emoji_Component'
]
},
// https://unicode.org/reports/tr51/
{
'path': 'Binary_Property/Emoji_Modifier',
'expressions': [
'Emoji_Modifier'
]
},
// https://unicode.org/reports/tr51/
{
'path': 'Binary_Property/Emoji_Modifier_Base',
'expressions': [
'Emoji_Modifier_Base'
]
},
// https://unicode.org/reports/tr51/
{
'path': 'Binary_Property/Emoji_Presentation',
'expressions': [
'Emoji_Presentation'
]
},
// https://unicode.org/reports/tr51/proposed.html
{
'path': 'Binary_Property/Extended_Pictographic',
'expressions': [
'Extended_Pictographic'
]
},
{
'path': 'Script_Extensions/Yezidi',
'expressions': [
'scx=Yezi',
'scx=Yezidi',
'Script_Extensions=Yezi',
'Script_Extensions=Yezidi',
]
},
{
'path': 'Script_Extensions/Toto',
'expressions': [
'scx=Toto',
'Script_Extensions=Toto',
]
},
];
const getPropertyValuePattern = (path) => {
const codePoints = require(`@unicode/unicode-14.0.0/${
path }/code-points.js`);
return {
'p': regenerate(codePoints).toString(),
'P': UNICODE_SET.clone().remove(codePoints).toString()
};
};
describe('unicodePropertyEscapes', () => {
const features = {
'unicodePropertyEscape': true
};
for (const fixture of unicodePropertyEscapeFixtures) {
const expected = getPropertyValuePattern(fixture.path);
for (const pattern of fixture.expressions) {
const p = `\\p{${ pattern }}`;
it('rewrites `/' + p + '/u` correctly', () => {
const transpiled = rewritePattern(p, 'u', features);
assert(
transpiled == expected.p ||
transpiled == '(?:' + expected.p + ')'
);
});
const P = `\\P{${ pattern }}`;
it('rewrites `/' + P + '/u` correctly', () => {
const transpiled = rewritePattern(P, 'u', features);
assert(
transpiled == expected.P ||
transpiled == '(?:' + expected.P + ')'
);
});
}
}
it('transpiles Unicode property escapes within various constructions', () => {
assert.equal(
rewritePattern('\\p{ASCII_Hex_Digit}', 'u', features),
'[0-9A-Fa-f]'
);
assert.equal(
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', features),
'(?:\\uD811[\\uDC00-\\uDE46])'
);
assert.equal(
rewritePattern('\\p{ASCII_Hex_Digit}+', 'u', features),
'[0-9A-Fa-f]+'
);
assert.equal(
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}+', 'u', features),
'(?:\\uD811[\\uDC00-\\uDE46])+'
);
assert.equal(
rewritePattern('[\\p{ASCII_Hex_Digit}_]', 'u', features),
'[0-9A-F_a-f]'
);
assert.equal(
rewritePattern('[^\\p{ASCII_Hex_Digit}_]', 'u', features),
'(?:(?![0-9A-F_a-f])[\\s\\S])'
);
assert.equal(
rewritePattern('[\\P{Script_Extensions=Anatolian_Hieroglyphs}]', 'u', features),
'(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uD810\\uD812-\\uDBFF][\\uDC00-\\uDFFF]|\\uD811[\\uDE47-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])'
);
assert.equal(
rewritePattern('[\\p{Script_Extensions=Anatolian_Hieroglyphs}_]', 'u', features),
'(?:_|\\uD811[\\uDC00-\\uDE46])'
);
assert.equal(
rewritePattern('[\\P{Script_Extensions=Anatolian_Hieroglyphs}_]', 'u', features),
'(?:[\\0-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uD810\\uD812-\\uDBFF][\\uDC00-\\uDFFF]|\\uD811[\\uDE47-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])'
);
assert.equal(
rewritePattern('(?:\\p{ASCII_Hex_Digit})', 'u', features),
'(?:[0-9A-Fa-f])'
);
assert.equal(
rewritePattern('(?:\\p{Script_Extensions=Anatolian_Hieroglyphs})', 'u', features),
'(?:(?:\\uD811[\\uDC00-\\uDE46]))'
);
assert.equal(
rewritePattern('(?:\\p{Script_Extensions=Wancho})', 'u', features),
'(?:(?:\\uD838[\\uDEC0-\\uDEF9\\uDEFF]))'
);
});
it('throws on unknown binary properties', () => {
assert.throws(() => {
rewritePattern('\\p{UnknownBinaryProperty}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{UnknownBinaryProperty}', 'u', features);
}, Error);
});
it('throws on explicitly unsupported properties', () => {
// https://github.com/tc39/proposal-regexp-unicode-property-escapes/issues/27
assert.throws(() => {
rewritePattern('\\P{Composition_Exclusion}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\p{Expands_On_NFC}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\p{Expands_On_NFD}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\p{Expands_On_NFKC}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\p{Expands_On_NFKD}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\p{FC_NFKC_Closure}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\p{Full_Composition_Exclusion}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Grapheme_Link}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Hyphen}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_Alphabetic}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_Default_Ignorable_Code_Point}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_Grapheme_Extend}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_ID_Continue}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_ID_Start}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_Lowercase}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_Math}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Other_Uppercase}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{Prepended_Concatenation_Mark}', 'u', features);
}, Error);
});
it('throws on non-binary properties without a value', () => {
assert.throws(() => {
rewritePattern('\\p{General_Category}', 'u', features);
}, Error);
});
it('throws on unknown property values', () => {
assert.throws(() => {
rewritePattern('\\p{General_Category=UnknownCategory}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{General_Category=UnknownCategory}', 'u', features);
}, Error);
});
it('throws when loose matching is attempted', () => {
assert.throws(() => {
rewritePattern('\\p{gc=uppercaseletter}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\p{Block=Superscripts and Subscripts}', 'u', features);
}, Error);
assert.throws(() => {
rewritePattern('\\P{_-_lOwEr_C-A_S-E_-_}', 'u', features);
}, Error);
});
it('simplifies the output using Unicode code point escapes when `useUnicodeFlag` is enabled', () => {
assert.equal(
rewritePattern('\\p{Script_Extensions=Anatolian_Hieroglyphs}', 'u', {
'unicodePropertyEscape': true,
'useUnicodeFlag': true
}),
'[\\u{14400}-\\u{14646}]'
);
});
it('should not transpile unicode property when unicodePropertyEscape is not enabled', () => {
assert.equal(
rewritePattern('\\p{ASCII_Hex_Digit}\\P{ASCII_Hex_Digit}', 'u'),
'\\p{ASCII_Hex_Digit}\\P{ASCII_Hex_Digit}'
);
});
it('should transpile to minimal case-insensitive set', () => {
assert.equal(
rewritePattern('\u03B8', 'iu'),
'[\\u03B8\\u03F4]'
);
assert.equal(
rewritePattern('\u03B8', 'iu', {
'useUnicodeFlag': true
}),
'\\u03B8'
);
});
});
const dotAllFlagFixtures = [
{
'pattern': '.',
'flags': 's',
'expected': '[\\s\\S]'
},
{
'pattern': '.',
'flags': 'gimsy',
'expected': '[\\s\\S]'
},
{
'pattern': '.',
'flags': 'su',
'expected': UNICODE_PATTERN
},
{
'pattern': '.',
'flags': 'gimsuy',
'expected': UNICODE_PATTERN
}
];
describe('dotAllFlag', () => {
const features = {
'dotAllFlag': true
};
for (const fixture of dotAllFlagFixtures) {
const pattern = fixture.pattern;
const flags = fixture.flags;
it('rewrites `/' + pattern + '/' + flags + '` correctly', () => {
const transpiled = rewritePattern(pattern, flags, features);
const expected = fixture.expected;
if (transpiled != '(?:' + expected + ')') {
assert.strictEqual(transpiled, expected);
}
});
}
});
const useDotAllFlagFixtures = [
{
'pattern': '.',
'flags': 'su',
'expected': '.'
}
]
describe('useDotAllFlag', () => {
const features = {
'useDotAllFlag': true
};
for (const fixture of useDotAllFlagFixtures) {
const pattern = fixture.pattern;
const flags = fixture.flags;
it('rewrites `/' + pattern + '/' + flags + '` correctly', () => {
const transpiled = rewritePattern(pattern, flags, features);
const expected = fixture.expected;
if (transpiled != '(?:' + expected + ')') {
assert.strictEqual(transpiled, expected);
}
});
}
it('should throw when both `useDotAllFlag` and `dotAll` is true', () => {
assert.throws(() => {
rewritePattern('.', 's', {
useDotAllFlag: true,
dotAllFlag: true
});
}, Error, '`useDotAllFlag` and `dotAllFlag` cannot both be true!')
})
})
const namedGroupFixtures = [
{
'pattern': '(?)\\k',
'flags': '',
'expected': '()\\1',
'expectedGroups': [
['name', 1]
]
},
{
'pattern': '(?)(?)\\k\\k',
'flags': '',
'expected': '()()\\1\\2',
'expectedGroups': [
['name1', 1],
['name2', 2]
]
},
{
'pattern': '()(?)\\k',
'flags': '',
'expected': '()()\\2',
'expectedGroups': [
['name', 2]
]
},
{
'pattern': '(?)()\\1',
'flags': '',
'expected': '()()\\1'
},
{
'pattern': '\\k\\k(?)\\k',
'flags': '',
'expected': '\\1\\1()\\1'
},
{
'pattern': '(?\\k)',
'flags': '',
'expected': '(\\1)'
},
{
'pattern': '(?<$𐒤>a)b\\k<$𐒤>',
'flags': '',
'expected': '(a)b\\1'
},
{
'pattern': '(?<=a)(?f)\\k',
'flags': '',
'expected': '(?<=a)(? {
for (const fixture of namedGroupFixtures) {
const {
pattern,
flags,
expected,
expectedGroups,
options = {}
} = fixture;
const groups = [];
Object.assign(options, {
'namedGroup': true,
'onNamedGroup': (name, index) => {
groups.push([ name, index ]);
}
});
it('rewrites `/' + pattern + '/' + flags + '` correctly', () => {
const transpiled = rewritePattern(pattern, flags, options);
assert.strictEqual(transpiled, expected);
if (expectedGroups) {
assert.deepStrictEqual(groups, expectedGroups);
}
});
}
it('onNamedGroup is optional', () => {
let transpiled;
const expected = '()';
assert.doesNotThrow(() => {
transpiled = rewritePattern('(?)', '', {
'namedGroup': true
});
});
assert.strictEqual(transpiled, expected);
});
it('multiple groups with the same name are disallowed', () => {
assert.throws(() => {
rewritePattern('(?)(?)', '', {
'namedGroup': true
});
});
});
it('named references must reference a group', () => {
assert.throws(() => {
rewritePattern('\\k', '', {
'namedGroup': true
});
});
});
it('should not transpile when namedGroup is not enabled', () => {
let transpiled;
const expected = '(?)';
assert.doesNotThrow(() => {
transpiled = rewritePattern('(?)', '');
});
assert.strictEqual(expected, transpiled);
})
});
const lookbehindFixtures = [
{
'pattern': '(?<=a)b',
'flags': '',
'expected': '(?<=a)b'
},
{
'pattern': '(?<=.)a',
'flags': '',
'expected': '(?<=.)a'
}
]
describe('lookbehind', () => {
for (const fixture of lookbehindFixtures) {
const pattern = fixture.pattern;
const flags = fixture.flags;
const expected = fixture.expected;
it('rewrites `/' + pattern + '/' + flags + '` correctly', () => {
const groups = [];
const transpiled = rewritePattern(pattern, flags, {
'lookbehind': true
});
assert.strictEqual(transpiled, expected);
});
}
})
const characterClassFixtures = [
{
pattern: '[^K]', // LATIN CAPITAL LETTER K
flags: 'iu',
expected: '(?![K\\u212A])[\\s\\S]'
},
{
pattern: '[^k]', // LATIN SMALL LETTER K
flags: 'iu',
expected: '(?![k\\u212A])[\\s\\S]'
},
{
pattern: '[^\u212a]', // KELVIN SIGN
flags: 'iu',
expected: '(?![K\\u212A])[\\s\\S]'
},
{
pattern: '[^K]', // LATIN CAPITAL LETTER K
flags: 'iu',
expected: '(?!K)[\\s\\S]',
useUnicodeFlag: true
},
{
pattern: '[^k]', // LATIN SMALL LETTER K
flags: 'iu',
expected: '(?!k)[\\s\\S]',
useUnicodeFlag: true
},
{
pattern: '[^\u212a]', // KELVIN SIGN
flags: 'iu',
expected: '(?!\\u212A)[\\s\\S]',
useUnicodeFlag: true
},
{
pattern: '[^K]', // LATIN CAPITAL LETTER K
flags: 'u',
expected: '(?!K)[\\s\\S]'
},
{
pattern: '[^k]', // LATIN SMALL LETTER K
flags: 'u',
expected: '(?!k)[\\s\\S]'
},
{
pattern: '[^\u212a]', // KELVIN SIGN
flags: 'u',
expected: '(?!\\u212A)[\\s\\S]'
},
{
pattern: '[^K]', // LATIN CAPITAL LETTER K
flags: 'u',
expected: '(?!K)[\\s\\S]',
useUnicodeFlag: true
},
{
pattern: '[^k]', // LATIN SMALL LETTER K
flags: 'u',
expected: '(?!k)[\\s\\S]',
useUnicodeFlag: true
},
{
pattern: '[^\u212a]', // KELVIN SIGN
flags: 'u',
expected: '(?!\\u212A)[\\s\\S]',
useUnicodeFlag: true
}
];
describe('character classes', () => {
for (const fixture of characterClassFixtures) {
const pattern = fixture.pattern;
const flags = fixture.flags;
const useUnicodeFlag = fixture.useUnicodeFlag;
it('rewrites `/' + pattern + '/' + flags + '` with' + (useUnicodeFlag ? '' : 'out') + ' unicode correctly', () => {
const transpiled = rewritePattern(pattern, flags, {
'useUnicodeFlag': useUnicodeFlag
});
const expected = fixture.expected;
const features = fixture.features;
if (transpiled != '(?:' + expected + ')') {
assert.strictEqual(transpiled, expected);
}
});
}
});