Skip to content

Commit

Permalink
feat: regex sampler
Browse files Browse the repository at this point in the history
Vendors a tiny dependency (total dependency tree <10 KB
uncompressed).

Other than removing unused code from the dependency, the only change
made is to change the following line:

https://github.com/fent/randexp.js/blob/2b35ea607883fa8cafa63bd3bdb5c12d695f873a/lib/randexp.js#L109

to a separately customizable callback.
  • Loading branch information
llllvvuu committed Aug 26, 2023
1 parent d143da1 commit 43071ce
Show file tree
Hide file tree
Showing 6 changed files with 396 additions and 11 deletions.
52 changes: 51 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
},
"dependencies": {
"@types/json-schema": "^7.0.7",
"json-pointer": "0.6.2"
"json-pointer": "0.6.2",
"randexp": "^0.5.3"
}
}
63 changes: 63 additions & 0 deletions src/samplers/regex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
const RandExp = require('../vendor/randexp');

const RANDOM_SEARCH_MAX_ITERS = 250;

/**
* Generates a string that matches a given regex pattern and,
* if possible, falls within the given length constraints.
*
* For some advanced regexes we may fail to meet the length constraints exactly.
* (e.g. knapsacks or chicken mcnugget problems)
*
* @param {string} pattern
* @param {number} minLength
* @param {number} maxLength
* @return {string}
*/
export function sampleRegex(pattern, minLength = 0, maxLength = Infinity) {
const randexp = new RandExp(pattern);

RandExp.prototype.randIntRepetition = (from) => from;
const initialSample = randexp.gen();
if (initialSample.length >= minLength && initialSample <= maxLength) {
return initialSample;
}

// Add on average this many characters to each repetition
let seed = 1;
// Adjust the seed by this much when result is too low or too high.
let learningRate = 2;

RandExp.prototype.randIntRepetition = (from, to) => {
const roll = seed * Math.random() * 2;
const remainder = roll % 1;
const intRoll = Math.floor(roll) + (Math.random() < remainder ? 1 : 0);
return from + (intRoll % (to - from + 1));
};

let distance = Infinity; // how much the best generation is too low or high by
let best;
for (let i = 0; i < RANDOM_SEARCH_MAX_ITERS; i++) {
const sample = randexp.gen();

if (sample.length < minLength) {
seed *= learningRate;
if (minLength - sample.length < distance) {
distance = minLength - sample.length;
best = sample;
}
} else if (sample.length > maxLength) {
seed /= learningRate;
if (sample.length - maxLength < distance) {
distance = sample.length - maxLength;
best = sample;
}
} else {
return sample;
}

learningRate -= 1 / RANDOM_SEARCH_MAX_ITERS;
}

return best;
}
32 changes: 23 additions & 9 deletions src/samplers/string.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
'use strict';

import { ensureMinLength, toRFCDateTime, uuid } from '../utils';
import { sampleRegex } from './regex';

const passwordSymbols = 'qwerty!@#$%^123456';

function truncateString(str, min, max) {
let res = ensureMinLength(str, min);
if (max && res.length > max) {
res = res.substring(0, max);
}
return res;
}

function emailSample() {
return '[email protected]';
}
Expand Down Expand Up @@ -42,12 +51,10 @@ function timeSample(min, max) {
return commonDateTimeSample({ min, max, omitTime: false, omitDate: true }).slice(1);
}

function defaultSample(min, max) {
let res = ensureMinLength('string', min);
if (max && res.length > max) {
res = res.substring(0, max);
}
return res;
function defaultSample(min, max, _propertyName, pattern) {
return pattern
? sampleRegex(pattern, min, max)
: truncateString('string', min, max)
}

function ipv4Sample() {
Expand Down Expand Up @@ -96,8 +103,10 @@ function relativeJsonPointerSample() {
return '1/relative/json/pointer';
}

function regexSample() {
return '/regex/';
function regexSample(min, max, _propertyName, pattern) {
return pattern
? sampleRegex(pattern, min, max)
: truncateString('/regex/', min, max)
}

const stringFormats = {
Expand Down Expand Up @@ -127,5 +136,10 @@ export function sampleString(schema, options, spec, context) {
let format = schema.format || 'default';
let sampler = stringFormats[format] || defaultSample;
let propertyName = context && context.propertyName;
return sampler(schema.minLength | 0, schema.maxLength, propertyName);
return sampler(
schema.minLength || 0,
schema.maxLength,
propertyName,
schema.pattern,
);
}
Loading

0 comments on commit 43071ce

Please sign in to comment.