From 610074645b0eb5dda535637bf4bb70ac1c87f6f4 Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Mon, 24 Sep 2018 12:43:08 -0400 Subject: [PATCH] fix comment-parser #184 --- scratch.js | 34 ++++++++++------------------- src/01-document/preProcess/index.js | 4 ++-- tests/comments.test.js | 22 +++++++++++++++++++ 3 files changed, 35 insertions(+), 25 deletions(-) create mode 100644 tests/comments.test.js diff --git a/scratch.js b/scratch.js index 2092df30..876c097e 100644 --- a/scratch.js +++ b/scratch.js @@ -3,10 +3,13 @@ const readFile = require('./tests/lib/_cachedPage'); // const wtf = require('./builds/wtf_wikipedia'); // const wtf = require('./build'); -// (async () => { -// let docs = await wtf.fetch(['June', 'July'], 'en'); -// console.log(docs); -// })(); +(async () => { + // #1 - SK Koeban Krasnodar + // #3 - Vleitjagra + // #4 - Indiese gelowe + // let doc = await wtf.fetch('SK Koeban Krasnodar', 'af'); + // console.log(doc.templates()); +})(); // let doc = readFile('Mark-Behr'); @@ -14,25 +17,10 @@ const readFile = require('./tests/lib/_cachedPage'); // console.log(doc.infobox(0).data); -// let str = ` -// '''Park Place''' may refer to: -// {{TOC right}} -// -// == Media == -// * [[Park Place (TV series)|Park Place]], a 1981 CBS sitcom -// -// == Places == -// -// === Canada === -// * [[Park Place (Ontario)]], a park in the city of Barrie -// * [[Park Place (Vancouver)]], a skyscraper -// * [[Park Place Mall]], Lethbridge, Alberta -// {{__throw-wtf-error}} -// {{disambiguation}} -// `; -// console.log(wtf(str).links()); +let str = `hello world`; +console.log(wtf(str).text()); -let str = `== ToekenningsStellenbosch Writers: http://www.stellenboschwriters.com/behrm.html ==`; -console.log(wtf(str).sections().map(s => s.title())); +// let str = `== ToekenningsStellenbosch Writers: http://www.stellenboschwriters.com/behrm.html ==`; +// console.log(wtf(str).sections().map(s => s.title())); // console.log(wtf(str).references()) // let str = `{{Nihongo|'''Toyota Motor Corporation'''|トヨタ自動車株式会社|Toyota Jidōsha [[Kabushiki gaisha|KK]]|{{IPA-ja|toꜜjota|IPA}}, {{IPAc-en|lang|t|ɔɪ|ˈ|oʊ|t|ə}}|lead=yes}}, usually shortened to '''Toyota''', is a Japanese [[Multinational corporation|multinational]] [[Automotive industry|automotive]] manufacturer headquartered in [[Toyota, Aichi]], Japan. `; diff --git a/src/01-document/preProcess/index.js b/src/01-document/preProcess/index.js index 0996e5c1..800c8224 100644 --- a/src/01-document/preProcess/index.js +++ b/src/01-document/preProcess/index.js @@ -3,14 +3,14 @@ const kill_xml = require('./kill_xml'); //this mostly-formatting stuff can be cleaned-up first, to make life easier function preProcess(r, wiki, options) { //remove comments - wiki = wiki.replace(//g, ''); + wiki = wiki.replace(//g, ''); wiki = wiki.replace(/__(NOTOC|NOEDITSECTION|FORCETOC|TOC)__/ig, ''); //signitures wiki = wiki.replace(/~~{1,3}/g, ''); //windows newlines wiki = wiki.replace(/\r/g, ''); //horizontal rule - wiki = wiki.replace(/--{1,3}/g, ''); + wiki = wiki.replace(/----/g, ''); //{{!}} - this weird thing https://www.mediawiki.org/wiki/Help:Magic_words#Other wiki = wiki.replace(/\{\{!\}\}/g, '|'); //formatting for templates-in-templates... diff --git a/tests/comments.test.js b/tests/comments.test.js new file mode 100644 index 00000000..e6c57763 --- /dev/null +++ b/tests/comments.test.js @@ -0,0 +1,22 @@ +'use strict'; +var test = require('tape'); +var wtf = require('./lib'); + +test('tricky comments', t => { + let str = `hello world`; + let doc = wtf(str); + t.equal(doc.text(), 'hello world', 'with brackets'); + t.equal(doc.references().length, 0, 'found no references'); + + str = `hello world`; + t.equal(wtf(str).text(), 'hello world', 'newlines'); + + str = `hello world`; + t.equal(wtf(str).text(), 'hello world', 'empty reference'); + t.end(); +});