-
Notifications
You must be signed in to change notification settings - Fork 133
/
Copy pathPatternDeobfuscateUtil.cpp
366 lines (315 loc) · 11.5 KB
/
PatternDeobfuscateUtil.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
#include <hexrays.hpp>
#include "HexRaysUtil.hpp"
#include "PatternDeobfuscateUtil.hpp"
#include "Config.hpp"
// For microinstructions with two or more operands (in l and r), check to see
// if one of them is numeric and the other one isn't. If this is the case,
// return pointers to the operands in the appropriately-named argument
// variables and return true. Otherwise, return false.
// This is a utility function that helps implement many other pattern-matching
// deobfuscations.
bool ExtractNumAndNonNum(minsn_t *insn, mop_t *&numOp, mop_t *&otherOp)
{
mop_t *num = NULL, *other = NULL;
if (insn->l.t == mop_n)
{
num = &insn->l;
other = &insn->r;
}
if (insn->r.t == mop_n)
{
if (num != NULL)
{
// Technically we have an option to perform constant folding
// here... but Hex-Rays should have done / should do that for us
return false;
}
num = &insn->r;
other = &insn->l;
}
if (num == NULL)
return false;
numOp = num;
otherOp = other;
return true;
}
// For microinstructions with two or more operands (in l and r), check to see
// if one of them is a mop_d (result of another microinstruction), where the
// provider microinstruction is has opcode type mc. If successful, return the
// provider microinstruction and the non-matching micro-operand in the
// appropriately-named arguments. Otherwise, return false.
// This helper function is useful for performing pattern-matching upon
// commutative operations. Without it, we'd have to write each of our patterns
// twice: once for when the operation we were looking for was on the left-hand
// side, and once for when the operation was on the right-hand side.
bool ExtractByOpcodeType(minsn_t *ins, mcode_t mc, minsn_t *&match, mop_t*& noMatch)
{
mop_t *possNoMatch = NULL;
minsn_t *possMatch = NULL;
// Does the left-hand side contain the operation we're looking for?
// Update possNoMatch or possMatch, depending.
if (!ins->l.is_insn() || ins->l.d->opcode != mc)
possNoMatch = &ins->l;
else
possMatch = ins->l.d;
// Perform the same check on the right-hand side.
if (!ins->r.is_insn() || ins->r.d->opcode != mc)
possNoMatch = &ins->r;
else
possMatch = ins->r.d;
// If both sides matched, or neither side matched, fail.
if (possNoMatch == NULL || possMatch == NULL)
return false;
match = possMatch;
noMatch = possNoMatch;
return true;
}
// The obfuscation techniques upon conditional operations have "&1"
// miscellaneously present or not present within them. Writing pattern-matching
// rules for all of the many possibilities would be extremely tedious. This
// helper function reduces the tedium by checking to see whether the provided
// microinstruction is "x & 1" (or "1 & x"), and it extracts x (as both an
// operand, and, if the operand is a mop_d (result of another
// microinstruction), return the provider instruction also.
bool TunnelThroughAnd1(minsn_t *ins, minsn_t *&inner, bool bRequireSize1, mop_t **opInner)
{
// Microinstruction must be AND
if (ins->opcode != m_and)
return false;
// One side must be numeric, the other one non-numeric
mop_t *andNum, *andNonNum;
if (!ExtractNumAndNonNum(ins, andNum, andNonNum))
return false;
// The number must be the value 1
if (andNum->nnn->value != 1)
return false;
if(bRequireSize1 && andNum->size != 1)
return false;
// If requested, pass the operand back to the caller this point
if(opInner != NULL)
*opInner = andNonNum;
// If the non-numeric operand is an instruction, extract the
// microinstruction and pass that back to the caller.
if (andNonNum->is_insn())
{
inner = andNonNum->d;
return true;
}
// Otherwise, if the non-numeric part wasn't a mop_d, check to see whether
// the caller specifically wanted a mop_d. If they did, fail. If the caller
// was willing to accept another operand type, return true.
return opInner != NULL;
}
// The obfuscator implements boolean inversion via "x ^ 1". Hex-Rays, or one of
// our other deobfuscation rules, could also convert these to m_lnot
// instructions. This function checks to see if the microinstruction passed as
// argument matches one of those patterns, and if so, extracts the negated
// term as both a micro-operand and a microinstruction (if the negated operand
// was of mop_d type).
bool ExtractLogicallyNegatedTerm(minsn_t *ins, minsn_t *&insNegated, mop_t **opNegated)
{
mop_t *nonNegated;
// Check the m_lnot case.
if (ins->opcode == m_lnot)
{
// Extract the operand, if requested by the caller.
if(opNegated != NULL)
*opNegated = &ins->l;
// If the operand was mop_d (i.e., result of another microinstruction),
// retrieve the provider microinstruction. Get rid of the pesky "&1"
// terms while we're at it.
if (ins->l.is_insn())
{
insNegated = ins->l.d;
while(TunnelThroughAnd1(insNegated, insNegated));
return true;
}
// Otherwise, if the operand was not of type mop_d, "success" depends
// on whether the caller was willing to accept a non-mop_d operand.
else
{
insNegated = NULL;
return opNegated != NULL;
}
}
// If the operand wasn't m_lnot, check the m_xor case.
if (ins->opcode != m_xor)
return false;
// We're looking for XORs with one constant and one non-constant operand
mop_t *xorNum, *xorNonNum;
if (!ExtractNumAndNonNum(ins, xorNum, xorNonNum))
return false;
// The constant must be the 1-byte value 1
if (xorNum->nnn->value != 1 || xorNum->size != 1)
return false;
// The non-numeric part must also be 1. This check is probably unnecessary.
if (xorNonNum->size != 1)
return false;
// If the caller wanted an operand, give it to them.
if (opNegated != NULL)
*opNegated = xorNonNum;
// If the operand was mop_d (result of another microinstruction), extract
// it and remove the &1 terms.
if (xorNonNum->is_insn())
{
insNegated = xorNonNum->d;
while (TunnelThroughAnd1(insNegated, insNegated));
return true;
}
// Otherwise, if the operand was not of type mop_d, "success" depends on
// whether the caller was willing to accept a non-mop_d operand.
insNegated = NULL;
return opNegated != NULL;
}
// This function checks whether two conditional terms are logically opposite.
// For example, "eax <s 1" and "eax >=s 1" would be considered logically
// opposite. The check is purely syntactic; semantically-equivalent conditions
// that were not implemented as syntactic logical opposites will not be
// considered the same by this function.
bool AreConditionsOpposite(minsn_t *lhsCond, minsn_t *rhsCond)
{
// Get rid of pesky &1 terms
while (TunnelThroughAnd1(lhsCond, lhsCond));
while (TunnelThroughAnd1(rhsCond, rhsCond));
// If the conditions were negated via m_lnot or m_xor by 1, get the
// un-negated part as a microinstruction.
bool bLhsWasNegated = ExtractLogicallyNegatedTerm(lhsCond, lhsCond);
bool bRhsWasNegated = ExtractLogicallyNegatedTerm(rhsCond, rhsCond);
// lhsCond and rhsCond will be set to NULL if their original terms were
// negated, but the thing that was negated wasn't the result of another
// microinstruction.
if (lhsCond == NULL || rhsCond == NULL)
return false;
// If one was negated and the other wasn't, compare them for equality.
// If the non-negated part of the negated comparison was identical to
// the non-negated comparison, then the conditions are clearly opposite.
// I guess this could also be extended by incorporating the logic from
// below, but I didn't need to do that in practice.
if (bLhsWasNegated != bRhsWasNegated)
return lhsCond->equal_insns(*rhsCond, EQ_IGNSIZE | EQ_IGNCODE);
// Otherwise, if both were negated or both were non-negated, compare the
// conditionals term-wise. First, ensure that both microoperands are
// setXX instructions.
else if (is_mcode_set(lhsCond->opcode) && is_mcode_set(rhsCond->opcode))
{
// Now we have two possibilities.
// #1: Condition codes are opposite, LHS and RHS are both equal
if (negate_mcode_relation(lhsCond->opcode) == rhsCond->opcode)
return
equal_mops_ignore_size(lhsCond->l, rhsCond->l) &&
equal_mops_ignore_size(lhsCond->r, rhsCond->r);
// #2: Condition codes are the same, LHS and RHS are swapped
if (lhsCond->opcode == rhsCond->opcode)
return
equal_mops_ignore_size(lhsCond->l, rhsCond->r) &&
equal_mops_ignore_size(lhsCond->r, rhsCond->l);
}
// No dice.
return false;
}
// Insert a micro-operand into one of the two sets above. Remove
// duplicates -- meaning, if the operand we're trying to insert is already
// in the set, remove the existing one instead. This is the "cancellation"
// in practice.
bool XorSimplifier::Insert(std::set<mop_t *> &whichSet, mop_t *op)
{
mop_t &rop = *op;
// Because mop_t types currently cannot be compared or hashed in the
// current microcode API, I had to use a slow linear search procedure
// to compare the micro-operand we're trying to insert against all
// previously-inserted values in the relevant set.
for (auto otherOp : whichSet)
{
// If the micro-operand was already in the set, get rid of it.
if (equal_mops_ignore_size(rop, *otherOp))
{
whichSet.erase(otherOp);
// Mark these operands as being able to be deleted.
m_ZeroOut.push_back(op);
m_ZeroOut.push_back(otherOp);
// Couldn't insert.
return false;
}
}
// Otherwise, if it didn't match an operand already in the set, insert
// it into the set and return true on successful insertion.
whichSet.insert(op);
return true;
}
// Wrapper to insert constant and non-constant terms
bool XorSimplifier::InsertNonConst(mop_t *op)
{
++m_InsertedNonConst;
return Insert(m_NonConst, op);
}
bool XorSimplifier::InsertConst(mop_t *op)
{
++m_InsertedConst;
return Insert(m_Const, op);
}
// Insert one micro-operand. If the operand is the result of another XOR
// microinstruction, recursively insert the operands being XORed.
// Otherwise, insert the micro-operand into the proper set (constant or
// non-constant) depending upon its operand type.
void XorSimplifier::Insert(mop_t *op)
{
// If operand is m_xor microinstruction, recursively insert children
if (op->t == mop_d && op->d->opcode == m_xor)
{
Insert(&op->d->l);
Insert(&op->d->r);
return;
}
// Otherwise, insert it into the constant or non-constant set
if (op->t == mop_n)
InsertConst(op);
else
InsertNonConst(op);
}
// This function takes an XOR microinstruction and inserts its operands
// by calling the function above
void XorSimplifier::Insert(minsn_t *insn)
{
if (insn->opcode != m_xor)
{
#if OPTVERBOSE
char buf[1000];
mcode_t_to_string(insn, buf, sizeof(buf));
msg("[I] Tried to insert from non-XOR instruction of type %s at %a\n", buf, insn->ea);
#endif
return;
}
// Insert children
Insert(&insn->l);
Insert(&insn->r);
}
// Were any cancellations performed?
bool XorSimplifier::DidSimplify()
{
return !m_ZeroOut.empty();
//return m_Const.size() != m_InsertedConst || m_NonConst.size() != m_InsertedNonConst;
}
// Top-level functionality to simplify an XOR microinstruction. Insert the
// instruction, then see if any simplifications could be performed. If so,
// remove the simplified terms.
bool XorSimplifier::Simplify(minsn_t *insn)
{
// Only insert XOR instructions
if (insn->opcode != m_xor)
return false;
Insert(insn);
// Were there common terms that could be cancelled?
if (!DidSimplify())
return false;
// Perform the cancellations by zeroing out the common micro-operands
for (auto zo : m_ZeroOut)
zo->make_number(0, zo->size);
// Trigger Hex-Rays' ordinary optimizations, which will remove the
// XOR 0 terms. Return true.
#if IDA_SDK_VERSION == 710
insn->optimize_flat();
#elif IDA_SDK_VERSION >= 720
insn->optimize_solo();
#endif
return true;
}