Skip to content

Commit

Permalink
fix comment-parser #184
Browse files Browse the repository at this point in the history
  • Loading branch information
spencermountain committed Sep 24, 2018
1 parent 0d0f506 commit 6100746
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 25 deletions.
34 changes: 11 additions & 23 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,24 @@ const readFile = require('./tests/lib/_cachedPage');
// const wtf = require('./builds/wtf_wikipedia');
// const wtf = require('./build');

// (async () => {
// let docs = await wtf.fetch(['June', 'July'], 'en');
// console.log(docs);
// })();
(async () => {
// #1 - SK Koeban Krasnodar
// #3 - Vleitjagra
// #4 - Indiese gelowe
// let doc = await wtf.fetch('SK Koeban Krasnodar', 'af');
// console.log(doc.templates());
})();


// let doc = readFile('Mark-Behr');
// console.log(doc.sections().map(s => s.title())); //'publikasies'
// console.log(doc.infobox(0).data);


// let str = `
// '''Park Place''' may refer to:
// {{TOC right}}
//
// == Media ==
// * [[Park Place (TV series)|Park Place]], a 1981 CBS sitcom
//
// == Places ==
//
// === Canada ===
// * [[Park Place (Ontario)]], a park in the city of Barrie
// * [[Park Place (Vancouver)]], a skyscraper
// * [[Park Place Mall]], Lethbridge, Alberta
// {{__throw-wtf-error}}
// {{disambiguation}}
// `;
// console.log(wtf(str).links());
let str = `hello <!-- <ref>blah blah</ref> --> world`;
console.log(wtf(str).text());

let str = `== Toekennings<ref>Stellenbosch Writers: http://www.stellenboschwriters.com/behrm.html</ref> ==`;
console.log(wtf(str).sections().map(s => s.title()));
// let str = `== Toekennings<ref>Stellenbosch Writers: http://www.stellenboschwriters.com/behrm.html</ref> ==`;
// console.log(wtf(str).sections().map(s => s.title()));
// console.log(wtf(str).references())
// let str = `{{Nihongo|'''Toyota Motor Corporation'''|トヨタ自動車株式会社|Toyota Jidōsha [[Kabushiki gaisha|KK]]|{{IPA-ja|toꜜjota|IPA}}, {{IPAc-en|lang|t|ɔɪ|ˈ|oʊ|t|ə}}|lead=yes}}, usually shortened to '''Toyota''', is a Japanese [[Multinational corporation|multinational]] [[Automotive industry|automotive]] manufacturer headquartered in [[Toyota, Aichi]], Japan. <!--Cited below-->`;
4 changes: 2 additions & 2 deletions src/01-document/preProcess/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ const kill_xml = require('./kill_xml');
//this mostly-formatting stuff can be cleaned-up first, to make life easier
function preProcess(r, wiki, options) {
//remove comments
wiki = wiki.replace(/<!--[^>]{0,2000}-->/g, '');
wiki = wiki.replace(/<!--[\s\S]{0,2000}-->/g, '');
wiki = wiki.replace(/__(NOTOC|NOEDITSECTION|FORCETOC|TOC)__/ig, '');
//signitures
wiki = wiki.replace(/~~{1,3}/g, '');
//windows newlines
wiki = wiki.replace(/\r/g, '');
//horizontal rule
wiki = wiki.replace(/--{1,3}/g, '');
wiki = wiki.replace(/----/g, '');
//{{!}} - this weird thing https://www.mediawiki.org/wiki/Help:Magic_words#Other
wiki = wiki.replace(/\{\{!\}\}/g, '|');
//formatting for templates-in-templates...
Expand Down
22 changes: 22 additions & 0 deletions tests/comments.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
'use strict';
var test = require('tape');
var wtf = require('./lib');

test('tricky comments', t => {
let str = `hello <!-- <ref>blah blah</ref> --> world`;
let doc = wtf(str);
t.equal(doc.text(), 'hello world', 'with brackets');
t.equal(doc.references().length, 0, 'found no references');

str = `hello <!-- not this
or this
--> world`;
t.equal(wtf(str).text(), 'hello world', 'newlines');

str = `hello <!-- world`;
t.equal(wtf(str).text(), 'hello <!-- world', 'incomplete reference');

str = `hello <!----> world`;
t.equal(wtf(str).text(), 'hello world', 'empty reference');
t.end();
});

0 comments on commit 6100746

Please sign in to comment.