Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Several changes to make sls terminate more often with length/extract operations #7498

Merged
merged 9 commits into from
Dec 30, 2024
105 changes: 80 additions & 25 deletions src/ast/sls/sls_seq_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,16 @@ namespace sls {
}
if (ctx.rand(2) == 0 && update(e, rational(sx.length())))
return false;
// TODO: Why from the beginning? We can take any subsequence of given length
if (r < sx.length() && update(x, sx.extract(0, r.get_unsigned())))
return false;
if (update(e, rational(sx.length())))
return false;
if (r > sx.length() && update(x, sx + zstring(m_chars[ctx.rand(m_chars.size())])))
if (r > sx.length() && update(x, sx + (!m_chars.empty() ? m_chars[ctx.rand(m_chars.size())] : zstring("a"))))
return false;
verbose_stream() << mk_pp(x, m) << " = " << sx << " " << ve << "\n";
NOT_IMPLEMENTED_YET();
// This case seems to imply unsat
verbose_stream() << "The input might be unsat\n"; // example to trigger: (assert (and (>= (str.len X) 2) (= (str.substr X 0 1) "")))
VERIFY(false);
return false;
}

Expand Down Expand Up @@ -516,6 +518,8 @@ namespace sls {
return true;
if (seq.is_string(e->get_sort()) && strval0(e) == strval1(e))
return true;
SASSERT(!seq.is_string(e->get_sort()) || strval0(e).length() >= get_eval(e).min_length);
SASSERT(!seq.is_string(e->get_sort()) || strval0(e).length() <= get_eval(e).max_length);
if (e->get_family_id() == m_fid)
return repair_down_seq(e);
if (m.is_eq(e))
Expand All @@ -539,15 +543,17 @@ namespace sls {
len_u = r.get_unsigned();
if (len_u == val_x.length())
return true;
if (len_u < val_x.length()) {
for (unsigned i = 0; i + len_u < val_x.length(); ++i)
if (len_u < val_x.length()) {
for (unsigned i = 0; i + len_u < val_x.length(); ++i)
m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 });
}
if (!m_chars.empty()) {
zstring ch(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ x, val_x + ch, 1 });
m_str_updates.push_back({ x, ch + val_x, 1 });
}
zstring ch = !m_chars.empty() ? m_chars[ctx.rand(m_chars.size())] : zstring("a");
zstring val_x_new = val_x + ch;
m_str_updates.push_back({ x, val_x_new, 1 });
zstring val_x_new2 = ch + val_x;
if (val_x_new != val_x_new2)
m_str_updates.push_back({ x, val_x_new2, 1 });

return apply_update();
}

Expand Down Expand Up @@ -873,6 +879,22 @@ namespace sls {
return u[n][m];
}

int seq_plugin::add_str_update(expr* e, zstring const& val, double score) {
zstring new_v = trunc_pad_to_fit(get_eval(e).min_length, get_eval(e).max_length, val);
m_str_updates.push_back({ e, new_v, score });
return new_v.length() == val.length() ? 0 : (new_v.length() > val.length() ? 1 : -1);
}

zstring seq_plugin::trunc_pad_to_fit(unsigned min_length, unsigned max_length, zstring const& s) {
if (s.length() > max_length)
return s.extract(0, max_length);
if (s.length() >= min_length)
return s;
zstring r = s;
while (r.length() < min_length)
r += !m_chars.empty() ? m_chars[ctx.rand(m_chars.size())] : zstring("a");
return r;
}

bool seq_plugin::repair_down_str_eq_edit_distance_incremental(app* eq) {
auto const& L = lhs(eq);
Expand Down Expand Up @@ -1263,7 +1285,6 @@ namespace sls {
expr* x, * y;
VERIFY(seq.str.is_at(e, x, y));
zstring se = strval0(e);
verbose_stream() << "repair down at " << mk_pp(e, m) << " " << se << "\n";
if (se.length() > 1)
return false;
zstring sx = strval0(x);
Expand All @@ -1290,11 +1311,18 @@ namespace sls {
// index should be in bounds of a.
if (!is_value(x)) {
if (lenx > r && r >= 0) {
zstring new_x = sx.extract(0, r.get_unsigned()) + se + sx.extract(r.get_unsigned() + 1, lenx);
// insert or replace the desired character
zstring p = sx.extract(0, r.get_unsigned()) + se;
zstring new_x = p + sx.extract(r.get_unsigned() + 1, lenx - (r.get_unsigned() + 1));
m_str_updates.push_back({ x, new_x, 1 });
new_x = p + sx.extract(r.get_unsigned(), lenx - r.get_unsigned());
m_str_updates.push_back({ x, new_x, 1 });
}
if (lenx <= r) {
zstring new_x = sx + se;
else {
zstring new_x = sx;
while (new_x.length() < r)
new_x += zstring(!m_chars.empty() ? m_chars[ctx.rand(m_chars.size())] : zstring("a"));
new_x = trunc_pad_to_fit(get_eval(x).min_length, get_eval(x).max_length, new_x + se);
m_str_updates.push_back({ x, new_x, 1 });
}
}
Expand Down Expand Up @@ -1492,25 +1520,46 @@ namespace sls {
bool seq_plugin::repair_down_str_extract(app* e) {
expr* x, * offset, * len;
VERIFY(seq.str.is_extract(e, x, offset, len));
SASSERT(strval0(e) != strval1(e));
zstring v = strval0(e);
zstring r = strval0(x);
expr_ref offset_e = ctx.get_value(offset);
expr_ref len_e = ctx.get_value(len);
rational offset_val, len_val;
VERIFY(a.is_numeral(offset_e, offset_val));
VERIFY(a.is_numeral(len_e, len_val));
if (offset_val < 0)
return false;
if (len_val < 0)
return false;
SASSERT(offset_val.is_unsigned());
SASSERT(len_val.is_unsigned());
unsigned offset_u = offset_val.get_unsigned();
unsigned len_u = len_val.get_unsigned();

// std::cout << "repair extract " << mk_bounded_pp(e, m) << " := \"" << v << "\"" << std::endl;
// std::cout << "Args: \"" << r << "\" " << offset_val << " " << len_val << std::endl;

unsigned offset_u = offset_val.is_unsigned() ? offset_val.get_unsigned() : 0;
unsigned len_u = len_val.is_unsigned() ? len_val.get_unsigned() : 0;
bool has_empty = false;

if (offset_val.is_neg() || offset_val.get_unsigned() >= r.length()) {
has_empty = true;
for (unsigned i = 0; i < r.length(); i++)
m_int_updates.push_back({ offset, rational(i), 1 });
}

if (!len_val.is_pos()) {
has_empty = true;
for (unsigned i = 1; i + offset_u < r.length(); i++)
m_int_updates.push_back({ len, rational(i), 1 });
}

if (has_empty)
m_str_updates.push_back({ e, zstring(), 1 });

zstring prefix = r.extract(0, offset_u);
zstring suffix = r.extract(offset_u + len_u, r.length());
zstring new_r = prefix + v + suffix;
m_str_updates.push_back({ x, new_r, 1 });

new_r = trunc_pad_to_fit(get_eval(x).min_length, get_eval(x).max_length, new_r);

if (new_r != r)
m_str_updates.push_back({ x, new_r, 1 });

return apply_update();
}

Expand All @@ -1522,7 +1571,7 @@ namespace sls {
for (auto const& e : es)
value += strval0(e);
if (value == value0)
return true;
return true;
uint_set chars;

for (auto ch : value0)
Expand Down Expand Up @@ -1599,6 +1648,7 @@ namespace sls {


bool seq_plugin::apply_update() {
SASSERT(!m_str_updates.empty() || !m_int_updates.empty());
double sum_scores = 0;
for (auto const& [e, val, score] : m_str_updates)
sum_scores += score;
Expand Down Expand Up @@ -1633,7 +1683,9 @@ namespace sls {
if (is_str_update) {
auto [e, value, score] = m_str_updates[i];

// std::cout << "Trying str-update: " << mk_pp(e, m) << " := \"" << value << "\"" << std::endl;
if (update(e, value)) {
// std::cout << "Success" << std::endl;
IF_VERBOSE(3, verbose_stream() << "set value " << mk_bounded_pp(e, m) << " := \"" << value << "\"\n");
m_str_updates.reset();
m_int_updates.reset();
Expand All @@ -1649,7 +1701,9 @@ namespace sls {

IF_VERBOSE(3, verbose_stream() << "set value " << mk_bounded_pp(e, m) << " := " << value << "\n");

// std::cout << "Trying int-update: " << mk_pp(e, m) << " := " << value << std::endl;
if (update(e, value)) {
// std::cout << "Success" << std::endl;
m_int_updates.reset();
m_str_updates.reset();
return true;
Expand All @@ -1660,11 +1714,13 @@ namespace sls {
}
}

// std::cout << "No candidate found" << std::endl;
return false;
}

bool seq_plugin::update(expr* e, zstring const& value) {
SASSERT(value != strval0(e));
// std::cout << "str-update " << mk_pp(e, m) << " := \"" << value << "\" [\"" << strval0(e) << "\"]" << std::endl;
// if (value == strval0(e))
// return true;
if (is_value(e))
Expand Down Expand Up @@ -1750,7 +1806,6 @@ namespace sls {
if (!is_seq_predicate(e))
return;
auto a = to_app(e);
// verbose_stream() << "repair " << lit << " " << mk_pp(e, m) << " " << bval1(e) << "\n";
if (bval1(e) == lit.sign())
ctx.flip(lit.var());
}
Expand Down
3 changes: 3 additions & 0 deletions src/ast/sls/sls_seq_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ namespace sls {
void add_char_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
void add_substr_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);

int add_str_update(expr* e, zstring const& val, double score);
zstring trunc_pad_to_fit(unsigned min_length, unsigned max_length, zstring const& s);

// regex functionality

// enumerate set of strings that can match a prefix of regex r.
Expand Down
Loading