From 8fcb1ff4874d1891791280d63125d27ed29b58a8 Mon Sep 17 00:00:00 2001 From: wooorm Date: Wed, 4 Jun 2014 00:13:28 +0200 Subject: [PATCH] 0.0.2 --- .gitignore | 3 + .jscs.json | 145 +++++++++++++++++++++++++++++++++ .npmignore | 2 + .travis.yml | 4 + LICENSE | 22 +++++ Makefile | 21 +++++ Readme.md | 86 ++++++++++++++++++++ index.js | 139 ++++++++++++++++++++++++++++++++ package.json | 33 ++++++++ spec/retext.spec.js | 192 ++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 647 insertions(+) create mode 100644 .gitignore create mode 100644 .jscs.json create mode 100644 .npmignore create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 Readme.md create mode 100644 index.js create mode 100644 package.json create mode 100755 spec/retext.spec.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..f2a529eb --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules +.DS_Store +coverage \ No newline at end of file diff --git a/.jscs.json b/.jscs.json new file mode 100644 index 00000000..49a7ad71 --- /dev/null +++ b/.jscs.json @@ -0,0 +1,145 @@ +{ + "requireCurlyBraces": [ + "if", + "else", + "for", + "while", + "do", + "try", + "catch" + ], + "requireSpaceAfterKeywords": [ + "if", + "else", + "for", + "while", + "do", + "switch", + "return", + "try", + "catch" + ], + "requireSpaceBeforeBlockStatements": true, + "requireParenthesesAroundIIFE": true, + "requireSpacesInConditionalExpression": true, + "requireSpacesInFunctionExpression": { + "beforeOpeningCurlyBrace": true + }, + "requireSpacesInAnonymousFunctionExpression": { + "beforeOpeningRoundBrace": true, + "beforeOpeningCurlyBrace": true + }, + "requireSpacesInNamedFunctionExpression": { + "beforeOpeningRoundBrace": true, + "beforeOpeningCurlyBrace": true + }, + "requireSpacesInFunctionExpression": { + "beforeOpeningCurlyBrace": true + }, + "requireMultipleVarDecl": true, + "requireBlocksOnNewline": true, + "disallowPaddingNewlinesInBlocks": true, + "disallowEmptyBlocks": true, + "disallowSpacesInsideObjectBrackets": true, + "disallowSpacesInsideArrayBrackets": true, + "disallowSpacesInsideParentheses": true, + "requireSpacesInsideObjectBrackets": "all", + "disallowDanglingUnderscores": true, + "requireSpaceAfterObjectKeys": true, + "requireCommaBeforeLineBreak": true, + "requireOperatorBeforeLineBreak": [ + "?", + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==", + ">", + ">=", + "<", + "<=" + ], + "disallowLeftStickedOperators": [ + "?", + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==", + ">", + ">=", + "<", + "<=" + ], + "requireRightStickedOperators": ["!"], + "disallowRightStickedOperators": [ + "?", + "+", + "/", + "*", + ":", + "=", + "==", + "===", + "!=", + "!==", + ">", + ">=", + "<", + "<=" + ], + "requireLeftStickedOperators": [","], + "disallowSpaceAfterPrefixUnaryOperators": ["++", "--", "+", "-", "~", "!"], + "disallowSpaceBeforePostfixUnaryOperators": ["++", "--"], + "requireSpaceBeforeBinaryOperators": [ + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==" + ], + "requireSpaceAfterBinaryOperators": [ + "+", + "-", + "/", + "*", + "=", + "==", + "===", + "!=", + "!==" + ], + "disallowImplicitTypeConversion": ["numeric", "boolean", "binary", "string"], + "requireCamelCaseOrUpperCaseIdentifiers": true, + "disallowKeywords": ["with"], + "disallowMultipleLineStrings": true, + "disallowMultipleLineBreaks": true, + "validateLineBreaks": "LF", + "validateQuoteMarks": "'", + "disallowMixedSpacesAndTabs": true, + "disallowTrailingWhitespace": true, + "disallowKeywordsOnNewLine": ["else"], + "requireLineFeedAtFileEnd": true, + "maximumLineLength": 78, + "requireCapitalizedConstructors": true, + "safeContextKeyword": "self", + "requireDotNotation": true, + "disallowYodaConditions": true, + "validateJSDoc": { + "checkParamNames": true, + "checkRedundantParams": true, + "requireParamTypes": true + } +} \ No newline at end of file diff --git a/.npmignore b/.npmignore new file mode 100644 index 00000000..072df65a --- /dev/null +++ b/.npmignore @@ -0,0 +1,2 @@ +coverage +.travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..fca8ef01 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,4 @@ +language: node_js +node_js: + - 0.10 + - 0.11 diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..0c06d5bc --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +(The MIT License) + +Copyright (c) 2014 Titus Wormer + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +'Software'), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..8d3b09b1 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +make: lint complexity cover + +test: + @./node_modules/.bin/mocha --reporter spec --check-leaks -u exports spec/*.spec*.js + +watch: + @./node_modules/.bin/mocha --reporter min --check-leaks --watch spec/*.spec*.js + +lint: + # Lint (passes when empty): + @./node_modules/.bin/jshint index.js spec/*.spec*.js + @./node_modules/.bin/jscs ./index.js --reporter=inline + + +complexity: + # Complexity (passes when empty): + @./node_modules/.bin/cr -l --maxcyc 15 --format minimal --silent index.js + +cover: + # Cover (and test): + @./node_modules/.bin/istanbul cover --report html ./node_modules/.bin/_mocha -- -- --reporter min --check-leaks -u exports spec/*.spec*.js diff --git a/Readme.md b/Readme.md new file mode 100644 index 00000000..a224b168 --- /dev/null +++ b/Readme.md @@ -0,0 +1,86 @@ +# retext [![Build Status](https://travis-ci.org/wooorm/retext.png)](https://travis-ci.org/wooorm/retext) + +**retext** is a extensible natural language parser system—by default using [parse-english](https://github.com/wooorm/parse-english) as a parser and [textom](https://github.com/wooorm/textom/) as the object model. Provides a plugin-system for analysing and manipulating natural language. In JavaScript. NodeJS, and the browser. Tests provide 100% coverage. + +## Installation + +### With NPM + +```sh +$ npm install retext +``` + +### Git + +```sh +git clone https://github.com/wooorm/retext.git +cd retext +``` + +## Usage + +```js +var Retext = require('retext'), + emoji = require('retext-emoji'), + smartypants = require('retext-smartypants'), + input; + +// Modified first paragraph from: +// http://en.wikipedia.org/wiki/Three_wise_monkeys +input = 'The three wise monkeys [. . .] sometimes called the ' + + 'three mystic apes--are a pictorial maxim. Together ' + + 'they embody the proverbial principle to ("see no evil, ' + + 'hear no evil, speak no evil"). The three monkeys are ' + + 'Mizaru (:see_no_evil:), covering his eyes, who sees no ' + + 'evil; Kikazaru (:hear_no_evil:), covering his ears, ' + + 'who hears no evil; and Iwazaru (:speak_no_evil:), ' + + 'covering his mouth, who speaks no evil.' + +var text = new Retext() + .use(emoji({ + 'convert' : 'encode' + })) + .use(smartypants()) + .parse(input) + .toString(); +// The three wise monkeys […] sometimes called the three +// mystic apes—are a pictorial maxim. Together they +// embody the proverbial principle to (“see no evil, +// hear no evil, speak no evil”). The three monkeys are +// Mizaru (🙈), covering his eyes, who sees no evil; +// Kikazaru (🙉), covering his ears, who hears no evil; +// and Iwazaru (🙊), covering his mouth, who speaks no evil. +``` + +Plugins used: [retext-emoji](https://github.com/wooorm/retext-emoji) and [retext-smartypants](https://github.com/wooorm/retext-smartypants). + +## API + +### Retext(parser) + +Return a new `Retext` instance with the given parser. + +Takes a parser (Object, String, or null), or its name to use. Defaults to `"parse-english"`. When a string, requires the module. + +### Retext.prototype.use(plugin) + +Attaches a plugin. Returns self. + +Takes a plugin—a humble function—and when the `parse` method of the Retext instance is called, the plugin will be called with the parsed tree, and the Retext instance as arguments. Plugins can also have an `attach` method, which will be only called once (when the plugin is `use`d). + +### Retext.prototype.parse(source) + +Parses the given source (using the to the constructor given parser), and returns the—by `use`d plugins, modified—tree. + +Note that, during the parsing stage, when the `use` method is called by a plugin, the nested plugin is immediately called, before continuing on with its parent plugin—this enabled plugins to depend on other plugins. + +Returns a RootNode. + +## Related + + * [parse-english](https://github.com/wooorm/parse-english "Parse English") + * [textom](https://github.com/wooorm/textom "TextOM") + +## License + + MIT diff --git a/index.js b/index.js new file mode 100644 index 00000000..4610f4de --- /dev/null +++ b/index.js @@ -0,0 +1,139 @@ +/* jshint -W084, -W093 */ +(function () { + /** + * Expose `Retext`. Defined below, and used to instantiate a new + * Retext object. + */ + exports = module.exports = Retext; + + function useImmediately(rootNode, use) { + return function (plugin) { + var self = this, + length = self.plugins.length; + + use.apply(self, arguments); + + if (length !== self.plugins.length) { + plugin(rootNode, self); + } + + return self; + }; + } + + /** + * Define `Retext`. Exported above, and used to instantiate a new + * `Retext`. + * + * Note that, when parser is a string or not given, the to-require module + * is first removed from the Require cache. This results in a completly + * new parser module, and a new TextOM object, thus clearing any changes + * made to TextOM or the parser and that, for example, the following is + * true: + * + * !(new Retext().parse() instanceof new Retext().parse().constructor); + * + * The following however, is also true: + * + * var retext = new Retext(); + * retext.parse() instanceof retext.parse().constructor; + * + * + * @param {(Object|String)?} parser - the parser, or its name, to use. + * Defaults to "parse-english". + * @api public + * @constructor + */ + function Retext(parser) { + var self = this; + + if (!parser) { + parser = 'parse-english'; + } + + if (typeof parser === 'string') { + var cache = require.cache, attribute; + + for (attribute in cache) { + if (attribute.indexOf('/' + parser + '/') !== -1) { + delete cache[attribute]; + } + } + + parser = require(parser); + } + + self.parser = parser; + self.plugins = []; + } + + /** + * `Retext#use` takes a plugin—a humble function—and when the parse + * method of the Retext instance is called, the plugin will be called + * with the parsed tree, and the retext instance as arguments. + * + * Note that, during the parsing stage, when the `use` method is called + * by a plugin, the nested plugin is immediately called, before continuing + * on with its parent plugin. + * + * @param {Function} plugin - the plugin to call when parsing. + * @param {Function?} plugin.attach - called only once with a Retext + * instance. If you're planning on + * modifying TextOM or a parser, do it + * in this method. + * @return this + * @api public + */ + Retext.prototype.use = function (plugin) { + if (typeof plugin !== 'function') { + throw new TypeError('Illegal invocation: \'' + plugin + + '\' is not a valid argument for \'Retext.prototype.use\''); + } + + var self = this, + plugins = self.plugins; + + if (plugins.indexOf(plugin) === -1) { + if (plugin.attach) { + plugin.attach(self); + } + + plugins.push(plugin); + } + + return self; + }; + + /** + * `Retext#parse` takes a source to be given (and parsed) by the parser. + * Then, `parse` iterates over all plugins, and allows them to modify the + * TextOM tree created by the parser. + * + * Note that, during the parsing stage, when the `use` plugin is called + * by a plugin, the nested plugin is immediately called, before continuing + * on with its parent plugin. + * + * @param {(String|Node)?} source - The source to convert. + * @return {Node} - A RootNode containing the tokenised source. + * @api public + */ + Retext.prototype.parse = function (source) { + var self = this, + parser = self.parser, + plugins = self.plugins.concat(), + iterator = -1, + use = self.use, + rootNode = parser(source), + plugin; + + self.use = useImmediately(rootNode, use); + + while (plugin = plugins[++iterator]) { + plugin(rootNode, this); + } + + self.use = use; + + return rootNode; + }; +})(); diff --git a/package.json b/package.json new file mode 100644 index 00000000..964b4426 --- /dev/null +++ b/package.json @@ -0,0 +1,33 @@ +{ + "name": "retext", + "version": "0.0.2", + "description": "Extensible natural language system for analysing and manipulating natural language", + "keywords": [ + "natural", + "language", + "parser", + "analyse", + "manipule", + "parse-english", + "textom" + ], + "author": "Titus Wormer ", + "license": "MIT", + "dependencies": { + "parse-english": "~0.0.8" + }, + "devDependencies": { + "complexity-report": "~1.0.3", + "istanbul": "~0.2.10", + "jscs": "^1.4.5", + "jshint": "~2.5.1", + "mocha": "~1.20.0" + }, + "repository": { + "type": "git", + "url": "https://github.com/wooorm/retext.git" + }, + "scripts": { + "test": "make test" + } +} diff --git a/spec/retext.spec.js b/spec/retext.spec.js new file mode 100755 index 00000000..d6960497 --- /dev/null +++ b/spec/retext.spec.js @@ -0,0 +1,192 @@ + +var Retext = require('..'), + assert = require('assert'); + +/* istanbul ignore next: noop */ +function noop() {} + +describe('Retext()', function () { + it('should be of type `function`', function () { + assert(typeof Retext === 'function'); + }); + + it('should return a newly initialized `Retext` object, when invoked without arguments', function () { + assert(new Retext() instanceof Retext); + }); + + it('should set the `parser` attribute to `parse-english`, when invoked without arguments', function () { + var retext = new Retext(); + assert('parser' in retext); + assert(typeof retext.parser === 'function'); + assert('fromAST' in retext.parser); + assert('TextOM' in retext.parser); + }); + + it('should set the `parser` attribute to the passed in parser, when given', function () { + var retext = new Retext(noop); + assert('parser' in retext); + assert(retext.parser === noop); + }); + + it('should create a new context/parser/textom when required, thus not requiring from memory', function () { + var rootNode1 = new Retext().parse(); + var rootNode2 = new Retext().parse(); + + assert(rootNode1 instanceof rootNode1.constructor); + assert(!(rootNode1 instanceof rootNode2.constructor)); + assert(rootNode2 instanceof rootNode2.constructor); + assert(!(rootNode2 instanceof rootNode1.constructor)); + }); + + it('should set the `plugins` attribute to an empty array', function () { + var retext = new Retext(); + assert('plugins' in retext); + assert(retext.plugins instanceof Array); + assert(retext.hasOwnProperty('plugins')); + assert(retext.plugins.length === 0); + }); +}); + +describe('Retext#use', function () { + it('should be of type `function`', function () { + assert(typeof Retext.prototype.use === 'function'); + assert(typeof (new Retext()).use === 'function'); + }); + + it('should return self', function () { + var retext = new Retext(); + assert(retext.use(noop) === retext); + }); + + it('should throw, when something other than a function was given', function () { + var retext = new Retext(); + assert.throws(function () { retext.use(); }, /undefined/); + assert.throws(function () { retext.use(null); }, /null/); + assert.throws(function () { retext.use(undefined); }, /undefined/); + assert.throws(function () { retext.use(true); }, /true/); + assert.throws(function () { retext.use({}); }, /object Object/); + }); + + it('should attach `use`d plugins', function () { + var retext = new Retext(); + assert(retext.plugins.length === 0); + retext.use(noop); + assert(retext.plugins.length === 1); + }); + + it('should not attach `use`d plugins multiple times', function () { + var retext = new Retext(); + retext.use(noop); + assert(retext.plugins.length === 1); + retext.use(noop); + assert(retext.plugins.length === 1); + }); +}); + +describe('Retext#parse', function () { + it('should be of type `function`', function () { + assert(typeof Retext.prototype.parse === 'function'); + assert(typeof (new Retext()).parse === 'function'); + }); + + it('should return an instance of RootNode', function () { + var retext = new Retext(); + assert(retext.parse() instanceof retext.parser.TextOM.RootNode); + }); + + it('should immediately call the `attach` method on a plugin, when `use` is called', function () { + var retext = new Retext(), + isCalled = false; + + /* istanbul ignore next: noop */ + function plugin () {} + + plugin.attach = function () { + isCalled = true; + }; + + retext.use(plugin); + + assert(isCalled === true); + }); + + it('should not call the `attach` method, when `parse` is called', function () { + var retext = new Retext(), + isCalled = false; + + function plugin () {} + + plugin.attach = function () { + isCalled = true; + }; + + retext.use(plugin); + + isCalled = false; + + retext.parse(); + + assert(isCalled === false); + }); + + it('should call `use`d plugins, when `parse` is called', function () { + var retext = new Retext(), + isCalled = false; + + function plugin () { isCalled = true; } + + retext.use(plugin); + + assert(isCalled === false); + + retext.parse(); + + assert(isCalled === true); + }); + + it('should call `use`d plugins with an instance of RootNode and Retext, when `parse` is called', function () { + var retext = new Retext(), + args = null, + isCalled = false, + tree; + + function plugin () { args = arguments; } + + retext.use(plugin); + + tree = retext.parse(); + + assert(args[0] === tree); + assert(args[1] === retext); + }); + + it('should immediately call (during parsing) `use`d plugins, with an instance of RootNode and Retext', function () { + var retext = new Retext(), + args = null; + + function nestedPlugin () { args = arguments; } + + function plugin (tree, retext) { + retext.use(nestedPlugin); + assert(args[0] === tree); + assert(args[1] === retext); + } + + retext.use(plugin).parse(); + }); + + it('should not call (during parsing) `use`d plugins, when already used', function () { + var retext = new Retext(); + + function nestedPlugin () {} + + function plugin (tree, retext) { + var length = retext.plugins.length; + retext.use(nestedPlugin); + assert(length === retext.plugins.length); + } + + retext.use(nestedPlugin).use(plugin).parse(); + }); +}); +