Skip to content

Commit

Permalink
feat: regex sampler
Browse files Browse the repository at this point in the history
This fixes Redocly#152 except in an edge case where both `pattern` and
`maxLength` are used, and the sampler skips over the valid length range.

It introduces a tiny dependency (<10 KB uncompressed) which doesn't
really have any prominent competitors.
  • Loading branch information
llllvvuu committed Aug 23, 2023
1 parent d143da1 commit 64230d4
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 11 deletions.
52 changes: 51 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
},
"dependencies": {
"@types/json-schema": "^7.0.7",
"json-pointer": "0.6.2"
"json-pointer": "0.6.2",
"randexp": "^0.5.3"
}
}
85 changes: 76 additions & 9 deletions src/samplers/string.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,70 @@
'use strict';

import RandExp from 'randexp';

import { ensureMinLength, toRFCDateTime, uuid } from '../utils';

const passwordSymbols = 'qwerty!@#$%^123456';
const MAX_REGEX_SAMPLES = 100;

function sampleRegex(pattern, min, max) {
let res;
let i = 0;
let length;
let prevLength;

// Increase length of the sample until it satisfies the minimum.
do {
RandExp.prototype.randInt = (from, to) => Math.min(from + i, to);
res = new RandExp(pattern).gen();
prevLength = length;
length = res.length;
i++;
} while (length < min && i < MAX_REGEX_SAMPLES);

// Handle case where we went past the maximum.
// Example: /\d*\d*foo/, will sample foo, 11foo, 2222foo, etc.
//
// HACK: RandExp doesn't expose an API to set the value of a specific sample,
// so we'll just fuzz it. If no satisfying string is found,
// we prefer to return a string that is too long than to return a string
// that doesn't fit the regex.
if (max && max >= min && res.length > max) {
// Let N is the number of * or + in the regex.
// The probability that N coinflips with probability k/N comes up k heads
// ~ sqrt(N/(2 * pi * k * (n - k))) by Stirling's approximation
// This is worst case ~ sqrt(2 / (pi * n)) by taking k = N/2,
// so if N < 63 then we can hit an exact length with probability > 0.1,
// which means that with 100 samples we can hit an exact match
// with probability > 0.99997.
const targetProbability = ((min + max) / 2 - prevLength) / (length - prevLength)

for (let j = 0; j < MAX_REGEX_SAMPLES; j++) {
RandExp.prototype.randInt = (from, to) => Math.max(
from,
Math.min(from + i - 2 + (Math.random() < targetProbability ? 1 : 0), to),
);
const candidate = new RandExp(pattern).gen();
if (candidate.length >= min) {
if (candidate.length <= max) {
return candidate;
} else if (candidate.length < res.length) {
res = candidate;
}
}
}
}

return res;
}

function truncateString(str, min, max) {
let res = ensureMinLength(str, min);
if (max && res.length > max) {
res = res.substring(0, max);
}
return res;
}

function emailSample() {
return '[email protected]';
Expand Down Expand Up @@ -42,12 +104,10 @@ function timeSample(min, max) {
return commonDateTimeSample({ min, max, omitTime: false, omitDate: true }).slice(1);
}

function defaultSample(min, max) {
let res = ensureMinLength('string', min);
if (max && res.length > max) {
res = res.substring(0, max);
}
return res;
function defaultSample(min, max, _propertyName, pattern) {
return pattern
? sampleRegex(pattern, min, max)
: truncateString('string', min, max)
}

function ipv4Sample() {
Expand Down Expand Up @@ -96,8 +156,10 @@ function relativeJsonPointerSample() {
return '1/relative/json/pointer';
}

function regexSample() {
return '/regex/';
function regexSample(min, max, _propertyName, pattern) {
return pattern
? sampleRegex(pattern, min, max)
: truncateString('/regex/', min, max)
}

const stringFormats = {
Expand Down Expand Up @@ -127,5 +189,10 @@ export function sampleString(schema, options, spec, context) {
let format = schema.format || 'default';
let sampler = stringFormats[format] || defaultSample;
let propertyName = context && context.propertyName;
return sampler(schema.minLength | 0, schema.maxLength, propertyName);
return sampler(
schema.minLength || 0,
schema.maxLength,
propertyName,
schema.pattern,
);
}
16 changes: 16 additions & 0 deletions test/unit/string.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,22 @@ describe('sampleString', () => {
expect(res).to.equal('fb4274c7-4fcd-4035-8958-a680548957ff');
});

it('should return valid string for regex with min and max', () => {
const regex = 'foo-\\d+\\d+\\d+\\d+\\d+\\d+\\d+\\d+\\d+\\d+-bar'; // 10 reps

for (let i = 0; i < 100; i++) {
const targetLength = Math.floor(20 + Math.random() * 500);
const schema = {
format: 'regex',
pattern: regex,
minLength: targetLength,
maxLength: targetLength,
};
res = sampleString(schema, null, null, {propertyName: 'fooId'});
expect(res).to.match(new RegExp(`foo-\\d{${targetLength - 8}}-bar`));
}
});

it.each([
'email',
// 'idn-email', // unsupported by ajv-formats
Expand Down

0 comments on commit 64230d4

Please sign in to comment.