diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..f2a529eb --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules +.DS_Store +coverage \ No newline at end of file diff --git a/.jscs.json b/.jscs.json new file mode 100644 index 00000000..49a7ad71 --- /dev/null +++ b/.jscs.json @@ -0,0 +1,145 @@ +{ + "requireCurlyBraces": [ + "if", + "else", + "for", + "while", + "do", + "try", + "catch" + ], + "requireSpaceAfterKeywords": [ + "if", + "else", + "for", + "while", + "do", + "switch", + "return", + "try", + "catch" + ], + "requireSpaceBeforeBlockStatements": true, + "requireParenthesesAroundIIFE": true, + "requireSpacesInConditionalExpression": true, + "requireSpacesInFunctionExpression": { + "beforeOpeningCurlyBrace": true + }, + "requireSpacesInAnonymousFunctionExpression": { + "beforeOpeningRoundBrace": true, + "beforeOpeningCurlyBrace": true + }, + "requireSpacesInNamedFunctionExpression": { + "beforeOpeningRoundBrace": true, + "beforeOpeningCurlyBrace": true + }, + "requireSpacesInFunctionExpression": { + "beforeOpeningCurlyBrace": true + }, + "requireMultipleVarDecl": true, + "requireBlocksOnNewline": true, + "disallowPaddingNewlinesInBlocks": true, + "disallowEmptyBlocks": true, + "disallowSpacesInsideObjectBrackets": true, + "disallowSpacesInsideArrayBrackets": true, + "disallowSpacesInsideParentheses": true, + "requireSpacesInsideObjectBrackets": "all", + "disallowDanglingUnderscores": true, + "requireSpaceAfterObjectKeys": true, + "requireCommaBeforeLineBreak": true, + "requireOperatorBeforeLineBreak": [ + "?", + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==", + ">", + ">=", + "<", + "<=" + ], + "disallowLeftStickedOperators": [ + "?", + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==", + ">", + ">=", + "<", + "<=" + ], + "requireRightStickedOperators": ["!"], + "disallowRightStickedOperators": [ + "?", + "+", + "/", + "*", + ":", + "=", + "==", + "===", + "!=", + "!==", + ">", + ">=", + "<", + "<=" + ], + "requireLeftStickedOperators": [","], + "disallowSpaceAfterPrefixUnaryOperators": ["++", "--", "+", "-", "~", "!"], + "disallowSpaceBeforePostfixUnaryOperators": ["++", "--"], + "requireSpaceBeforeBinaryOperators": [ + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==" + ], + "requireSpaceAfterBinaryOperators": [ + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==" + ], + "disallowImplicitTypeConversion": ["numeric", "boolean", "binary", "string"], + "requireCamelCaseOrUpperCaseIdentifiers": true, + "disallowKeywords": ["with"], + "disallowMultipleLineStrings": true, + "disallowMultipleLineBreaks": true, + "validateLineBreaks": "LF", + "validateQuoteMarks": "'", + "disallowMixedSpacesAndTabs": true, + "disallowTrailingWhitespace": true, + "disallowKeywordsOnNewLine": ["else"], + "requireLineFeedAtFileEnd": true, + "maximumLineLength": 78, + "requireCapitalizedConstructors": true, + "safeContextKeyword": "self", + "requireDotNotation": true, + "disallowYodaConditions": true, + "validateJSDoc": { + "checkParamNames": true, + "checkRedundantParams": true, + "requireParamTypes": true + } +} \ No newline at end of file diff --git a/.npmignore b/.npmignore new file mode 100644 index 00000000..072df65a --- /dev/null +++ b/.npmignore @@ -0,0 +1,2 @@ +coverage +.travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..fca8ef01 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,4 @@ +language: node_js +node_js: + - 0.10 + - 0.11 diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..0c06d5bc --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +(The MIT License) + +Copyright (c) 2014 Titus Wormer + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +'Software'), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..8d3b09b1 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +make: lint complexity cover + +test: + @./node_modules/.bin/mocha --reporter spec --check-leaks -u exports spec/*.spec*.js + +watch: + @./node_modules/.bin/mocha --reporter min --check-leaks --watch spec/*.spec*.js + +lint: + # Lint (passes when empty): + @./node_modules/.bin/jshint index.js spec/*.spec*.js + @./node_modules/.bin/jscs ./index.js --reporter=inline + + +complexity: + # Complexity (passes when empty): + @./node_modules/.bin/cr -l --maxcyc 15 --format minimal --silent index.js + +cover: + # Cover (and test): + @./node_modules/.bin/istanbul cover --report html ./node_modules/.bin/_mocha -- -- --reporter min --check-leaks -u exports spec/*.spec*.js diff --git a/Readme.md b/Readme.md new file mode 100644 index 00000000..a224b168 --- /dev/null +++ b/Readme.md @@ -0,0 +1,86 @@ +# retext [![Build Status](https://travis-ci.org/wooorm/retext.png)](https://travis-ci.org/wooorm/retext) + +**retext** is a extensible natural language parser system—by default using [parse-english](https://github.com/wooorm/parse-english) as a parser and [textom](https://github.com/wooorm/textom/) as the object model. Provides a plugin-system for analysing and manipulating natural language. In JavaScript. NodeJS, and the browser. Tests provide 100% coverage. + +## Installation + +### With NPM + +```sh +$ npm install retext +``` + +### Git + +```sh +git clone https://github.com/wooorm/retext.git +cd retext +``` + +## Usage + +```js +var Retext = require('retext'), + emoji = require('retext-emoji'), + smartypants = require('retext-smartypants'), + input; + +// Modified first paragraph from: +// http://en.wikipedia.org/wiki/Three_wise_monkeys +input = 'The three wise monkeys [. . .] sometimes called the ' + + 'three mystic apes--are a pictorial maxim. Together ' + + 'they embody the proverbial principle to ("see no evil, ' + + 'hear no evil, speak no evil"). The three monkeys are ' + + 'Mizaru (:see_no_evil:), covering his eyes, who sees no ' + + 'evil; Kikazaru (:hear_no_evil:), covering his ears, ' + + 'who hears no evil; and Iwazaru (:speak_no_evil:), ' + + 'covering his mouth, who speaks no evil.' + +var text = new Retext() + .use(emoji({ + 'convert' : 'encode' + })) + .use(smartypants()) + .parse(input) + .toString(); +// The three wise monkeys […] sometimes called the three +// mystic apes—are a pictorial maxim. Together they +// embody the proverbial principle to (“see no evil, +// hear no evil, speak no evil”). The three monkeys are +// Mizaru (🙈), covering his eyes, who sees no evil; +// Kikazaru (🙉), covering his ears, who hears no evil; +// and Iwazaru (🙊), covering his mouth, who speaks no evil. +``` + +Plugins used: [retext-emoji](https://github.com/wooorm/retext-emoji) and [retext-smartypants](https://github.com/wooorm/retext-smartypants). + +## API + +### Retext(parser) + +Return a new `Retext` instance with the given parser. + +Takes a parser (Object, String, or null), or its name to use. Defaults to `"parse-english"`. When a string, requires the module. + +### Retext.prototype.use(plugin) + +Attaches a plugin. Returns self. + +Takes a plugin—a humble function—and when the `parse` method of the Retext instance is called, the plugin will be called with the parsed tree, and the Retext instance as arguments. Plugins can also have an `attach` method, which will be only called once (when the plugin is `use`d). + +### Retext.prototype.parse(source) + +Parses the given source (using the to the constructor given parser), and returns the—by `use`d plugins, modified—tree. + +Note that, during the parsing stage, when the `use` method is called by a plugin, the nested plugin is immediately called, before continuing on with its parent plugin—this enabled plugins to depend on other plugins. + +Returns a RootNode. + +## Related + + * [parse-english](https://github.com/wooorm/parse-english "Parse English") + * [textom](https://github.com/wooorm/textom "TextOM") + +## License + + MIT diff --git a/index.js b/index.js new file mode 100644 index 00000000..4610f4de --- /dev/null +++ b/index.js @@ -0,0 +1,139 @@ +/* jshint -W084, -W093 */ +(function () { + /** + * Expose `Retext`. Defined below, and used to instantiate a new + * Retext object. + */ + exports = module.exports = Retext; + + function useImmediately(rootNode, use) { + return function (plugin) { + var self = this, + length = self.plugins.length; + + use.apply(self, arguments); + + if (length !== self.plugins.length) { + plugin(rootNode, self); + } + + return self; + }; + } + + /** + * Define `Retext`. Exported above, and used to instantiate a new + * `Retext`. + * + * Note that, when parser is a string or not given, the to-require module + * is first removed from the Require cache. This results in a completly + * new parser module, and a new TextOM object, thus clearing any changes + * made to TextOM or the parser and that, for example, the following is + * true: + * + * !(new Retext().parse() instanceof new Retext().parse().constructor); + * + * The following however, is also true: + * + * var retext = new Retext(); + * retext.parse() instanceof retext.parse().constructor; + * + * + * @param {(Object|String)?} parser - the parser, or its name, to use. + * Defaults to "parse-english". + * @api public + * @constructor + */ + function Retext(parser) { + var self = this; + + if (!parser) { + parser = 'parse-english'; + } + + if (typeof parser === 'string') { + var cache = require.cache, attribute; + + for (attribute in cache) { + if (attribute.indexOf('/' + parser + '/') !== -1) { + delete cache[attribute]; + } + } + + parser = require(parser); + } + + self.parser = parser; + self.plugins = []; + } + + /** + * `Retext#use` takes a plugin—a humble function—and when the parse + * method of the Retext instance is called, the plugin will be called + * with the parsed tree, and the retext instance as arguments. + * + * Note that, during the parsing stage, when the `use` method is called + * by a plugin, the nested plugin is immediately called, before continuing + * on with its parent plugin. + * + * @param {Function} plugin - the plugin to call when parsing. + * @param {Function?} plugin.attach - called only once with a Retext + * instance. If you're planning on + * modifying TextOM or a parser, do it + * in this method. + * @return this + * @api public + */ + Retext.prototype.use = function (plugin) { + if (typeof plugin !== 'function') { + throw new TypeError('Illegal invocation: \'' + plugin + + '\' is not a valid argument for \'Retext.prototype.use\''); + } + + var self = this, + plugins = self.plugins; + + if (plugins.indexOf(plugin) === -1) { + if (plugin.attach) { + plugin.attach(self); + } + + plugins.push(plugin); + } + + return self; + }; + + /** + * `Retext#parse` takes a source to be given (and parsed) by the parser. + * Then, `parse` iterates over all plugins, and allows them to modify the + * TextOM tree created by the parser. + * + * Note that, during the parsing stage, when the `use` plugin is called + * by a plugin, the nested plugin is immediately called, before continuing + * on with its parent plugin. + * + * @param {(String|Node)?} source - The source to convert. + * @return {Node} - A RootNode containing the tokenised source. + * @api public + */ + Retext.prototype.parse = function (source) { + var self = this, + parser = self.parser, + plugins = self.plugins.concat(), + iterator = -1, + use = self.use, + rootNode = parser(source), + plugin; + + self.use = useImmediately(rootNode, use); + + while (plugin = plugins[++iterator]) { + plugin(rootNode, this); + } + + self.use = use; + + return rootNode; + }; +})(); diff --git a/package.json b/package.json new file mode 100644 index 00000000..964b4426 --- /dev/null +++ b/package.json @@ -0,0 +1,33 @@ +{ + "name": "retext", + "version": "0.0.2", + "description": "Extensible natural language system for analysing and manipulating natural language", + "keywords": [ + "natural", + "language", + "parser", + "analyse", + "manipule", + "parse-english", + "textom" + ], + "author": "Titus Wormer ", + "license": "MIT", + "dependencies": { + "parse-english": "~0.0.8" + }, + "devDependencies": { + "complexity-report": "~1.0.3", + "istanbul": "~0.2.10", + "jscs": "^1.4.5", + "jshint": "~2.5.1", + "mocha": "~1.20.0" + }, + "repository": { + "type": "git", + "url": "https://github.com/wooorm/retext.git" + }, + "scripts": { + "test": "make test" + } +} diff --git a/spec/retext.spec.js b/spec/retext.spec.js new file mode 100755 index 00000000..d6960497 --- /dev/null +++ b/spec/retext.spec.js @@ -0,0 +1,192 @@ + +var Retext = require('..'), + assert = require('assert'); + +/* istanbul ignore next: noop */ +function noop() {} + +describe('Retext()', function () { + it('should be of type `function`', function () { + assert(typeof Retext === 'function'); + }); + + it('should return a newly initialized `Retext` object, when invoked without arguments', function () { + assert(new Retext() instanceof Retext); + }); + + it('should set the `parser` attribute to `parse-english`, when invoked without arguments', function () { + var retext = new Retext(); + assert('parser' in retext); + assert(typeof retext.parser === 'function'); + assert('fromAST' in retext.parser); + assert('TextOM' in retext.parser); + }); + + it('should set the `parser` attribute to the passed in parser, when given', function () { + var retext = new Retext(noop); + assert('parser' in retext); + assert(retext.parser === noop); + }); + + it('should create a new context/parser/textom when required, thus not requiring from memory', function () { + var rootNode1 = new Retext().parse(); + var rootNode2 = new Retext().parse(); + + assert(rootNode1 instanceof rootNode1.constructor); + assert(!(rootNode1 instanceof rootNode2.constructor)); + assert(rootNode2 instanceof rootNode2.constructor); + assert(!(rootNode2 instanceof rootNode1.constructor)); + }); + + it('should set the `plugins` attribute to an empty array', function () { + var retext = new Retext(); + assert('plugins' in retext); + assert(retext.plugins instanceof Array); + assert(retext.hasOwnProperty('plugins')); + assert(retext.plugins.length === 0); + }); +}); + +describe('Retext#use', function () { + it('should be of type `function`', function () { + assert(typeof Retext.prototype.use === 'function'); + assert(typeof (new Retext()).use === 'function'); + }); + + it('should return self', function () { + var retext = new Retext(); + assert(retext.use(noop) === retext); + }); + + it('should throw, when something other than a function was given', function () { + var retext = new Retext(); + assert.throws(function () { retext.use(); }, /undefined/); + assert.throws(function () { retext.use(null); }, /null/); + assert.throws(function () { retext.use(undefined); }, /undefined/); + assert.throws(function () { retext.use(true); }, /true/); + assert.throws(function () { retext.use({}); }, /object Object/); + }); + + it('should attach `use`d plugins', function () { + var retext = new Retext(); + assert(retext.plugins.length === 0); + retext.use(noop); + assert(retext.plugins.length === 1); + }); + + it('should not attach `use`d plugins multiple times', function () { + var retext = new Retext(); + retext.use(noop); + assert(retext.plugins.length === 1); + retext.use(noop); + assert(retext.plugins.length === 1); + }); +}); + +describe('Retext#parse', function () { + it('should be of type `function`', function () { + assert(typeof Retext.prototype.parse === 'function'); + assert(typeof (new Retext()).parse === 'function'); + }); + + it('should return an instance of RootNode', function () { + var retext = new Retext(); + assert(retext.parse() instanceof retext.parser.TextOM.RootNode); + }); + + it('should immediately call the `attach` method on a plugin, when `use` is called', function () { + var retext = new Retext(), + isCalled = false; + + /* istanbul ignore next: noop */ + function plugin () {} + + plugin.attach = function () { + isCalled = true; + }; + + retext.use(plugin); + + assert(isCalled === true); + }); + + it('should not call the `attach` method, when `parse` is called', function () { + var retext = new Retext(), + isCalled = false; + + function plugin () {} + + plugin.attach = function () { + isCalled = true; + }; + + retext.use(plugin); + + isCalled = false; + + retext.parse(); + + assert(isCalled === false); + }); + + it('should call `use`d plugins, when `parse` is called', function () { + var retext = new Retext(), + isCalled = false; + + function plugin () { isCalled = true; } + + retext.use(plugin); + + assert(isCalled === false); + + retext.parse(); + + assert(isCalled === true); + }); + + it('should call `use`d plugins with an instance of RootNode and Retext, when `parse` is called', function () { + var retext = new Retext(), + args = null, + isCalled = false, + tree; + + function plugin () { args = arguments; } + + retext.use(plugin); + + tree = retext.parse(); + + assert(args[0] === tree); + assert(args[1] === retext); + }); + + it('should immediately call (during parsing) `use`d plugins, with an instance of RootNode and Retext', function () { + var retext = new Retext(), + args = null; + + function nestedPlugin () { args = arguments; } + + function plugin (tree, retext) { + retext.use(nestedPlugin); + assert(args[0] === tree); + assert(args[1] === retext); + } + + retext.use(plugin).parse(); + }); + + it('should not call (during parsing) `use`d plugins, when already used', function () { + var retext = new Retext(); + + function nestedPlugin () {} + + function plugin (tree, retext) { + var length = retext.plugins.length; + retext.use(nestedPlugin); + assert(length === retext.plugins.length); + } + + retext.use(nestedPlugin).use(plugin).parse(); + }); +}); +