Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new string repair heuristic #7496

Merged
merged 7 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 52 additions & 11 deletions src/ast/sls/sls_seq_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,17 @@ Equality solving using stochastic Nelson.
#include "ast/sls/sls_seq_plugin.h"
#include "ast/sls/sls_context.h"
#include "ast/ast_pp.h"
#include "params/sls_params.hpp"


namespace sls {

struct zstring_hash_proc {
unsigned operator()(zstring const & s) const {
auto str = s.encode();
return string_hash(str.c_str(), static_cast<unsigned>(s.length()), 17);
}
};

seq_plugin::seq_plugin(context& c):
plugin(c),
Expand All @@ -107,6 +115,8 @@ namespace sls {
thrw(c.get_manager())
{
m_fid = seq.get_family_id();
sls_params p(c.get_params());
m_str_update_strategy = p.str_update_strategy() == 0 ? EDIT_CHAR : EDIT_SUBSTR;
}

void seq_plugin::propagate_literal(sat::literal lit) {
Expand Down Expand Up @@ -531,12 +541,12 @@ namespace sls {
return true;
if (len_u < val_x.length()) {
for (unsigned i = 0; i + len_u < val_x.length(); ++i)
m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 });
m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 });
}
if (!m_chars.empty()) {
zstring ch(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ x, val_x + ch, 1 });
m_str_updates.push_back({ x, ch + val_x, 1 });
m_str_updates.push_back({ x, ch + val_x, 1 });
}
return apply_update();
}
Expand Down Expand Up @@ -619,7 +629,7 @@ namespace sls {
if (!is_value(x))
m_str_updates.push_back({ x, strval1(y), 1 });
if (!is_value(y))
m_str_updates.push_back({ y, strval1(x), 1 });
m_str_updates.push_back({ y, strval1(x), 1 });
}
else {
// disequality
Expand Down Expand Up @@ -665,8 +675,39 @@ namespace sls {
return d[n][m];
}


void seq_plugin::add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
if (m_str_update_strategy == EDIT_CHAR)
add_char_edit_updates(w, val, val_other, chars);
else
add_substr_edit_updates(w, val, val_other, chars);
}

void seq_plugin::add_substr_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
// all consecutive subsequences of val_other
hashtable<zstring, zstring_hash_proc, default_eq<zstring>> set;
set.insert(zstring(""));
for (unsigned i = 0; i < val_other.length(); ++i) {
for (unsigned j = val_other.length(); j > 0; ++j) {
zstring sub = val_other.extract(i, j);
if (set.contains(sub))
break;
set.insert(sub);
}
}

for (auto x : w) {
if (is_value(x))
continue;
zstring const& a = strval0(x);
for (auto& seq : set) {
if (seq == a)
continue;
m_str_updates.push_back({ x, seq, 1 });
}
}
}

void seq_plugin::add_char_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
for (auto x : w) {
if (is_value(x))
continue;
Expand Down Expand Up @@ -1057,7 +1098,7 @@ namespace sls {
}
if (!is_value(xi)) {
m_str_updates.push_back({ xi, vi.extract(0, i), score });
m_str_updates.push_back({ xi, vi.extract(0, i) + zstring(vj[j]), score});
m_str_updates.push_back({ xi, vi.extract(0, i) + zstring(vj[j]), score});
}
if (!is_value(yj)) {
m_str_updates.push_back({ yj, vj.extract(0, j), score });
Expand Down Expand Up @@ -1236,7 +1277,7 @@ namespace sls {
if (!is_value(x)) {
m_str_updates.push_back({ x, zstring(), 1 });
if (lenx > r && r >= 0)
m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 });
m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 });
}
if (!m.is_value(y)) {
m_int_updates.push_back({ y, rational(lenx), 1 });
Expand Down Expand Up @@ -1287,7 +1328,7 @@ namespace sls {
if (value == -1) {
m_str_updates.push_back({ y, zstring(m_chars[ctx.rand(m_chars.size())]), 1 });
if (lenx > 0)
m_str_updates.push_back({ x, zstring(), 1 });
m_str_updates.push_back({ x, zstring(), 1 });
}
// change x:
// insert y into x at offset
Expand All @@ -1305,7 +1346,7 @@ namespace sls {
if (offset_r.is_unsigned() && 0 <= value && offset_u + value < lenx) {
unsigned offs = offset_u + value.get_unsigned();
for (unsigned i = offs; i < lenx; ++i)
m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 });
m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 });
}

// change offset:
Expand Down Expand Up @@ -1436,13 +1477,13 @@ namespace sls {
if (idx > 0)
su = sa.extract(0, idx);
su = su + sa.extract(idx + sb.length(), sa.length() - idx - sb.length());
m_str_updates.push_back({a, su, 1});
m_str_updates.push_back({ a, su, 1});
}
if (!m_chars.empty() && !is_value(b)) {
zstring sb1 = sb + zstring(m_chars[ctx.rand(m_chars.size())]);
zstring sb2 = zstring(m_chars[ctx.rand(m_chars.size())]) + sb;
m_str_updates.push_back({b, sb1, 1});
m_str_updates.push_back({b, sb2, 1});
m_str_updates.push_back({ b, sb1, 1});
m_str_updates.push_back({ b, sb2, 1});
}
}
return apply_update();
Expand Down
10 changes: 9 additions & 1 deletion src/ast/sls/sls_seq_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,19 @@ namespace sls {
ptr_vector<expr> lhs, rhs;
};

enum edit_distance_strategy {
EDIT_CHAR,
EDIT_SUBSTR,
};

seq_util seq;
arith_util a;
seq_rewriter rw;
th_rewriter thrw;
scoped_ptr_vector<eval> m_values;
indexed_uint_set m_chars; // set of characters in the problem
bool m_initialized = false;
bool m_initialized = false;
edit_distance_strategy m_str_update_strategy;

struct str_update {
expr* e;
Expand Down Expand Up @@ -122,6 +128,8 @@ namespace sls {
unsigned edit_distance_with_updates(string_instance const& a, string_instance const& b);
unsigned edit_distance(zstring const& a, zstring const& b);
void add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
void add_char_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
void add_substr_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);

// regex functionality

Expand Down
5 changes: 3 additions & 2 deletions src/params/sls_params.pyg
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ def_module_params('sls',
description='Experimental Stochastic Local Search Solver (for QFBV only).',
params=(max_memory_param(),
('max_restarts', UINT, UINT_MAX, 'maximum number of restarts'),
('max_repairs', UINT, 1000, 'maximum number of repairs before restart'),
('max_repairs', UINT, 1000, 'maximum number of repairs before restart'),
('walksat', BOOL, 1, 'use walksat assertion selection (instead of gsat)'),
('walksat_ucb', BOOL, 1, 'use bandit heuristic for walksat assertion selection (instead of random)'),
('walksat_ucb_constant', DOUBLE, 20.0, 'the ucb constant c in the term score + c * f(touched)'),
Expand All @@ -24,5 +24,6 @@ def_module_params('sls',
('rescore', BOOL, 1, 'rescore/normalize top-level score every base restart interval'),
('dt_axiomatic', BOOL, True, 'use axiomatic mode or model reduction for datatype solver'),
('track_unsat', BOOL, 0, 'keep a list of unsat assertions as done in SAT - currently disabled internally'),
('random_seed', UINT, 0, 'random seed')
('random_seed', UINT, 0, 'random seed'),
('str_update_strategy', UINT, 1, 'string update candidate selection: 0 - single character based update, 1 - subsequence based update')
))
Loading