diff --git a/README.md b/README.md index 4b755ce..9a5a11c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,14 @@ +# add by idning: rdbtools/cli/redis_stat.py + +:: + + $ time ./redis-rdb-tools/rdbtools/cli/redis_stat.py -k 'user-' -k 'file-' dump.rdb.part + - cnt cnt_hash cnt_string mem mem_hash mem_string z_cnt_no_expire z_sum_expire + user 220178 220178 0 34886519 34886519 0 0 -81039445726.0 + file 71356 0 71356 15250363 0 15250363 0 -26377119006.0 + z_unknown 2314 0 2314 252226 0 252226 2314 0 + zz_all 302813 220178 82635 52746444 34886519 17859925 2314 -1.10728956618e+11 + # Parse Redis dump.rdb files, Analyze Memory, and Export Data to JSON # Rdbtools is a parser for Redis' dump.rdb files. The parser generates events similar to an xml sax parser, and is very efficient memory wise. diff --git a/rdbtools/cli/rdb.py b/rdbtools/cli/rdb.py index a05c58f..3228af3 100755 --- a/rdbtools/cli/rdb.py +++ b/rdbtools/cli/rdb.py @@ -1,9 +1,15 @@ #!/usr/bin/env python import os import sys + +PWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(PWD, '../../')) + from optparse import OptionParser from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, PrintAllKeys + + VALID_TYPES = ("hash", "set", "string", "list", "sortedset") def main(): usage = """usage: %prog [options] /path/to/dump.rdb diff --git a/rdbtools/cli/redis_profiler.py b/rdbtools/cli/redis_profiler.py index c946816..a570880 100755 --- a/rdbtools/cli/redis_profiler.py +++ b/rdbtools/cli/redis_profiler.py @@ -1,6 +1,10 @@ #!/usr/bin/env python import os import sys + +PWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(PWD, '../../')) + from string import Template from optparse import OptionParser from rdbtools import RdbParser, MemoryCallback, PrintAllKeys, StatsAggregator diff --git a/rdbtools/cli/redis_stat.py b/rdbtools/cli/redis_stat.py new file mode 100755 index 0000000..2efe78b --- /dev/null +++ b/rdbtools/cli/redis_stat.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +import os +import sys + +PWD = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(PWD, '../../')) + +import collections + +from string import Template +from optparse import OptionParser +from rdbtools import RdbParser, MemoryCallback, PrintAllKeys, StatsAggregator + + +class Stat2D(object): + def __init__(self, init=None): + self.ddict = collections.defaultdict(lambda:collections.defaultdict(int)) + + def __getitem__(self, key): + return self.ddict[key] + + def __setitem__(self, key, value): + self.ddict[key] = value + + def __delitem__(self, key): + del self.ddict[key] + + def __contains__(self, key): + return key in self.ddict + + def __len__(self): + return len(self.ddict) + + def __repr__(self): + return repr(self.ddict) + def __iter__(self): + return self.ddict.__iter__() + def __reversed__(self): + return self.ddict.__reversed__() + + def keys(self): + return self.ddict.keys() + + def rows(self): + if not len(self.ddict): + return [] + + headers = [d.keys() for d in self.ddict.values()] + headers = list(set([i for sublist in headers for i in sublist])) # flat list and uniq + headers.sort() + + rows = [] + rows.append(['-'] + headers) + for k in self.ddict: + row = [str(self.ddict[k][h]) for h in headers] + rows.append([k] + row) + rows = sorted(rows) + return rows + + + def to_text(self, rows=None): + if not rows: + rows = self.rows() + if not rows: + return '-' + + def padding(s, width): + return ' '*(width-len(s)) + s + + for j in range(len(rows[0])): + max_width = max([len(row[j]) for row in rows]) + for i in range(len(rows)): + rows[i][j] = padding(rows[i][j], max_width) + + rows = [[c for c in r] for r in rows] + return '\n'.join( ['\t'.join(row) for row in rows]) + + def __str__(self): + return self.to_text() + + def get_html(self): + pass + + +class MyStatsAggregator(): + def __init__(self, key_groupings = []): + self.stat2d = Stat2D() + self.key_groupings = key_groupings + + def next_record(self, record): + + def get_ns(key): + for k in self.key_groupings: + if key.startswith(k): + return k + return 'z_unknown' + ns = get_ns(str(record.key)) + + for k in [ns, 'zz_all']: + self.stat2d[k]['cnt_' + record.type] += 1 + self.stat2d[k]['mem_' + record.type] += record.bytes + self.stat2d[k]['cnt'] += 1 + self.stat2d[k]['mem'] += record.bytes + + if record.ttl == -1: + self.stat2d[k]['z_cnt_no_expire'] += 1 + else: + self.stat2d[k]['z_sum_expire'] += record.ttl + + #self.add_aggregate('key_group_count', ns, 1) + #self.add_aggregate('key_group_memory', ns, record.bytes) + #if record.ttl == -1: + #self.add_aggregate('key_group_no_expire_count', ns, 1) + #else: + #self.add_aggregate('key_group_sum_expire', ns, record.ttl) + + def to_text(self): + return self.stat2d.to_text() + +def main(): + usage = """usage: %prog [options] /path/to/dump.rdb + +Example 1 : %prog -k "user-" -k "friends-" /var/redis/6379/dump.rdb +Example 2 : %prog /var/redis/6379/dump.rdb""" + + parser = OptionParser(usage=usage) + + parser.add_option("-k", "--key", dest="keys", action="append", + help="Keys that should be grouped together. Multiple prefix can be provided") + + (options, args) = parser.parse_args() + + if len(args) == 0: + parser.error("Redis RDB file not specified") + dump_file = args[0] + if not options.keys: + options.keys = [] + + stat2d = MyStatsAggregator(options.keys) + callback = MemoryCallback(stat2d, 64) + parser = RdbParser(callback) + try: + parser.parse(dump_file) + except Exception, e: + print 'error: ', e + + print stat2d.to_text() + +if __name__ == '__main__': + main() + diff --git a/rdbtools/memprofiler.py b/rdbtools/memprofiler.py index 3a80ba2..63f6828 100644 --- a/rdbtools/memprofiler.py +++ b/rdbtools/memprofiler.py @@ -1,6 +1,9 @@ from collections import namedtuple import random import json +import datetime +import math + from rdbtools.parser import RdbCallback from rdbtools.callbacks import encode_key @@ -9,7 +12,7 @@ ZSKIPLIST_P=0.25 REDIS_SHARED_INTEGERS = 10000 -MemoryRecord = namedtuple('MemoryRecord', ['database', 'type', 'key', 'bytes', 'encoding','size', 'len_largest_element']) +MemoryRecord = namedtuple('MemoryRecord', ['database', 'type', 'key', 'bytes', 'encoding','size', 'len_largest_element','ttl']) class StatsAggregator(): def __init__(self, key_groupings = None): @@ -27,6 +30,8 @@ def next_record(self, record): self.add_histogram(record.type + "_length", record.size) self.add_histogram(record.type + "_memory", (record.bytes/10) * 10) + + self.add_histogram('TTL',record.ttl) if record.type == 'list': self.add_scatter('list_memory_by_length', record.bytes, record.size) @@ -70,23 +75,25 @@ def get_json(self): class PrintAllKeys(): def __init__(self, out): self._out = out - self._out.write("%s,%s,%s,%s,%s,%s,%s\n" % ("database", "type", "key", - "size_in_bytes", "encoding", "num_elements", "len_largest_element")) + self._out.write("%s,%s,%s,%s,%s,%s,%s,%s\n" % ("database", "type", "key", + "size_in_bytes", "encoding", "num_elements", "len_largest_element","ttl")) def next_record(self, record) : - self._out.write("%d,%s,%s,%d,%s,%d,%d\n" % (record.database, record.type, encode_key(record.key), - record.bytes, record.encoding, record.size, record.len_largest_element)) + self._out.write("%d,%s,%s,%d,%s,%d,%d,%s\n" % (record.database, record.type, encode_key(record.key), + record.bytes, record.encoding, record.size, record.len_largest_element,record.ttl)) class MemoryCallback(RdbCallback): '''Calculates the memory used if this rdb file were loaded into RAM The memory usage is approximate, and based on heuristics. ''' def __init__(self, stream, architecture): + self._current_time = datetime.datetime.now() self._stream = stream self._dbnum = 0 self._current_size = 0 self._current_encoding = None self._current_length = 0 + self._current_expire = -1 self._len_largest_element = 0 if architecture == 64 or architecture == '64': @@ -113,13 +120,14 @@ def set(self, key, value, expiry, info): size += self.key_expiry_overhead(expiry) length = element_length(value) - record = MemoryRecord(self._dbnum, "string", key, size, self._current_encoding, length, length) + record = MemoryRecord(self._dbnum, "string", key, size, self._current_encoding, length, length, self.parse_expiry(expiry)) self._stream.next_record(record) self.end_key() def start_hash(self, key, length, expiry, info): self._current_encoding = info['encoding'] - self._current_length = length + self._current_length = length + self._current_expire = expiry size = self.sizeof_string(key) size += 2*self.robj_overhead() size += self.top_level_object_overhead() @@ -146,7 +154,7 @@ def hset(self, key, field, value): self._current_size += 2*self.robj_overhead() def end_hash(self, key): - record = MemoryRecord(self._dbnum, "hash", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element) + record = MemoryRecord(self._dbnum, "hash", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element,self.parse_expiry(self._current_expire)) self._stream.next_record(record) self.end_key() @@ -164,13 +172,14 @@ def sadd(self, key, member): self._current_size += self.robj_overhead() def end_set(self, key): - record = MemoryRecord(self._dbnum, "set", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element) + record = MemoryRecord(self._dbnum, "set", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element,self.parse_expiry(self._current_expire)) self._stream.next_record(record) self.end_key() def start_list(self, key, length, expiry, info): self._current_length = length self._current_encoding = info['encoding'] + self._current_expire = expiry size = self.sizeof_string(key) size += 2*self.robj_overhead() size += self.top_level_object_overhead() @@ -194,13 +203,14 @@ def rpush(self, key, value) : self._current_size += self.robj_overhead() def end_list(self, key): - record = MemoryRecord(self._dbnum, "list", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element) + record = MemoryRecord(self._dbnum, "list", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element, self.parse_expiry(self._current_expire)) self._stream.next_record(record) self.end_key() def start_sorted_set(self, key, length, expiry, info): self._current_length = length self._current_encoding = info['encoding'] + self._current_expire = expiry size = self.sizeof_string(key) size += 2*self.robj_overhead() size += self.top_level_object_overhead() @@ -225,7 +235,7 @@ def zadd(self, key, score, member): self._current_size += self.skiplist_entry_overhead() def end_sorted_set(self, key): - record = MemoryRecord(self._dbnum, "sortedset", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element) + record = MemoryRecord(self._dbnum, "sortedset", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element, self.parse_expiry(self._current_expire)) self._stream.next_record(record) self.end_key() @@ -233,6 +243,7 @@ def end_key(self): self._current_encoding = None self._current_size = 0 self._len_largest_element = 0 + self._current_expire = None def sizeof_string(self, string): # See struct sdshdr over here https://github.com/antirez/redis/blob/unstable/src/sds.h @@ -327,7 +338,11 @@ def zset_random_level(self): return level else: return ZSKIPLIST_MAXLEVEL - + def parse_expiry(self,expiry): + if not expiry: + return -1 + else: + return math.ceil((expiry-self._current_time).total_seconds()) def element_length(element): if isinstance(element, int):