package/package.json000644 000765 000024 0000000740 12655445100013020 0ustar00000000 000000 { "name": "addressparser", "version": "1.0.1", "description": "Parse e-mail addresses", "main": "lib/addressparser.js", "repository": { "type": "git", "url": "https://github.com/andris9/addressparser.git" }, "author": "Andris Reinman", "license": "MIT", "scripts": { "test": "grunt" }, "devDependencies": { "chai": "^3.5.0", "grunt": "^0.4.5", "grunt-eslint": "^17.3.1", "grunt-mocha-test": "^0.12.7", "mocha": "^2.4.5" } } package/.npmignore000644 000765 000024 0000000053 12655443137012536 0ustar00000000 000000 .travis.yml .eslintrc.js test Gruntfile.js package/README.md000644 000765 000024 0000003173 12655443137012024 0ustar00000000 000000 # addressparser Parse e-mail address fields. Input can be a single address (`"andris@kreata.ee"`), a formatted address (`"Andris Reinman "`), comma separated list of addresses (`"andris@kreata.ee, andris.reinman@kreata.ee"`), an address group (`"disclosed-recipients:andris@kreata.ee;"`) or a mix of all the formats. In addition to comma the semicolon is treated as the list delimiter as well (except when used in the group syntax), so a value `"andris@kreata.ee; andris.reinman@kreata.ee"` is identical to `"andris@kreata.ee, andris.reinman@kreata.ee"`. ## Installation Install with npm ``` npm install addressparser ``` ## Usage Include the module ```javascript var addressparser = require('addressparser'); ``` Parse some address strings with `addressparser(field)` ```javascript var addresses = addressparser('andris '); console.log(addresses); // [{name: "andris", address:"andris@tr.ee"}] ``` And when using groups ```javascript addressparser('Composers:"Bach, Sebastian" , mozart@example.com (Mozzie);'); ``` the result would be ``` [ { name: "Composers", group: [ { address: "sebu@example.com", name: "Bach, Sebastian" }, { address: "mozart@example.com", name: "Mozzie" } ] } ] ``` > Be prepared though that groups might be nested. ## Notes This module does not decode any mime-word or punycode encoded strings, it is only a basic parser for parsing the base data, you need to decode the encoded parts later by yourself ## License **MIT**package/LICENSE000644 000765 000024 0000001647 12655443137011556 0ustar00000000 000000 Copyright (c) 2014-2016 Andris Reinman Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. package/lib/addressparser.js000644 000765 000024 0000017506 12655445421014516 0ustar00000000 000000 'use strict'; // expose to the world module.exports = addressparser; /** * Parses structured e-mail addresses from an address field * * Example: * * 'Name ' * * will be converted to * * [{name: 'Name', address: 'address@domain'}] * * @param {String} str Address field * @return {Array} An array of address objects */ function addressparser(str) { var tokenizer = new Tokenizer(str); var tokens = tokenizer.tokenize(); var addresses = []; var address = []; var parsedAddresses = []; tokens.forEach(function (token) { if (token.type === 'operator' && (token.value === ',' || token.value === ';')) { if (address.length) { addresses.push(address); } address = []; } else { address.push(token); } }); if (address.length) { addresses.push(address); } addresses.forEach(function (address) { address = _handleAddress(address); if (address.length) { parsedAddresses = parsedAddresses.concat(address); } }); return parsedAddresses; } /** * Converts tokens for a single address into an address object * * @param {Array} tokens Tokens object * @return {Object} Address object */ function _handleAddress(tokens) { var token; var isGroup = false; var state = 'text'; var address; var addresses = []; var data = { address: [], comment: [], group: [], text: [] }; var i; var len; // Filter out , (comments) and regular text for (i = 0, len = tokens.length; i < len; i++) { token = tokens[i]; if (token.type === 'operator') { switch (token.value) { case '<': state = 'address'; break; case '(': state = 'comment'; break; case ':': state = 'group'; isGroup = true; break; default: state = 'text'; } } else if (token.value) { if (state === 'address') { // handle use case where unquoted name includes a "<" // Apple Mail truncates everything between an unexpected < and an address // and so will we token.value = token.value.replace(/^[^<]*<\s*/, ''); } data[state].push(token.value); } } // If there is no text but a comment, replace the two if (!data.text.length && data.comment.length) { data.text = data.comment; data.comment = []; } if (isGroup) { // http://tools.ietf.org/html/rfc2822#appendix-A.1.3 data.text = data.text.join(' '); addresses.push({ name: data.text || (address && address.name), group: data.group.length ? addressparser(data.group.join(',')) : [] }); } else { // If no address was found, try to detect one from regular text if (!data.address.length && data.text.length) { for (i = data.text.length - 1; i >= 0; i--) { if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) { data.address = data.text.splice(i, 1); break; } } var _regexHandler = function (address) { if (!data.address.length) { data.address = [address.trim()]; return ' '; } else { return address; } }; // still no address if (!data.address.length) { for (i = data.text.length - 1; i >= 0; i--) { // fixed the regex to parse email address correctly when email address has more than one @ data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^\s]+\b\s*/, _regexHandler).trim(); if (data.address.length) { break; } } } } // If there's still is no text but a comment exixts, replace the two if (!data.text.length && data.comment.length) { data.text = data.comment; data.comment = []; } // Keep only the first address occurence, push others to regular text if (data.address.length > 1) { data.text = data.text.concat(data.address.splice(1)); } // Join values with spaces data.text = data.text.join(' '); data.address = data.address.join(' '); if (!data.address && isGroup) { return []; } else { address = { address: data.address || data.text || '', name: data.text || data.address || '' }; if (address.address === address.name) { if ((address.address || '').match(/@/)) { address.name = ''; } else { address.address = ''; } } addresses.push(address); } } return addresses; } /** * Creates a Tokenizer object for tokenizing address field strings * * @constructor * @param {String} str Address field string */ function Tokenizer(str) { this.str = (str || '').toString(); this.operatorCurrent = ''; this.operatorExpecting = ''; this.node = null; this.escaped = false; this.list = []; } /** * Operator tokens and which tokens are expected to end the sequence */ Tokenizer.prototype.operators = { '"': '"', '(': ')', '<': '>', ',': '', ':': ';', // Semicolons are not a legal delimiter per the RFC2822 grammar other // than for terminating a group, but they are also not valid for any // other use in this context. Given that some mail clients have // historically allowed the semicolon as a delimiter equivalent to the // comma in their UI, it makes sense to treat them the same as a comma // when used outside of a group. ';': '' }; /** * Tokenizes the original input string * * @return {Array} An array of operator|text tokens */ Tokenizer.prototype.tokenize = function () { var chr, list = []; for (var i = 0, len = this.str.length; i < len; i++) { chr = this.str.charAt(i); this.checkChar(chr); } this.list.forEach(function (node) { node.value = (node.value || '').toString().trim(); if (node.value) { list.push(node); } }); return list; }; /** * Checks if a character is an operator or text and acts accordingly * * @param {String} chr Character from the address field */ Tokenizer.prototype.checkChar = function (chr) { if ((chr in this.operators || chr === '\\') && this.escaped) { this.escaped = false; } else if (this.operatorExpecting && chr === this.operatorExpecting) { this.node = { type: 'operator', value: chr }; this.list.push(this.node); this.node = null; this.operatorExpecting = ''; this.escaped = false; return; } else if (!this.operatorExpecting && chr in this.operators) { this.node = { type: 'operator', value: chr }; this.list.push(this.node); this.node = null; this.operatorExpecting = this.operators[chr]; this.escaped = false; return; } if (!this.escaped && chr === '\\') { this.escaped = true; return; } if (!this.node) { this.node = { type: 'text', value: '' }; this.list.push(this.node); } if (this.escaped && chr !== '\\') { this.node.value += '\\'; } this.node.value += chr; this.escaped = false; }; package/CHANGELOG.md000644 000765 000024 0000001040 12655445252012345 0ustar00000000 000000 # Changelog ## v1.0.1 2016-02-06 * If the input string includes an unexpected < which messes up address part, then truncate unexpected data (similar to OSX Mail) ## v1.0.0 2016-01-11 * Start using semver compatible versioning scheme, starting from v1.0.0 * Replaced jshint with eslint * Dropped node 0.8 from the test targets. Should still work though ## v0.3.2 2015-01-07 * Added changelog * Allow semicolon (;) as address separator in addition to comma (,). Backport from https://github.com/whiteout-io/addressparser/pull/5