Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add redis_stat.py #43

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
# add by idning: rdbtools/cli/redis_stat.py

::

$ time ./redis-rdb-tools/rdbtools/cli/redis_stat.py -k 'user-' -k 'file-' dump.rdb.part
- cnt cnt_hash cnt_string mem mem_hash mem_string z_cnt_no_expire z_sum_expire
user 220178 220178 0 34886519 34886519 0 0 -81039445726.0
file 71356 0 71356 15250363 0 15250363 0 -26377119006.0
z_unknown 2314 0 2314 252226 0 252226 2314 0
zz_all 302813 220178 82635 52746444 34886519 17859925 2314 -1.10728956618e+11

# Parse Redis dump.rdb files, Analyze Memory, and Export Data to JSON #

Rdbtools is a parser for Redis' dump.rdb files. The parser generates events similar to an xml sax parser, and is very efficient memory wise.
Expand Down
6 changes: 6 additions & 0 deletions rdbtools/cli/rdb.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
#!/usr/bin/env python
import os
import sys

PWD = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(PWD, '../../'))

from optparse import OptionParser
from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, PrintAllKeys



VALID_TYPES = ("hash", "set", "string", "list", "sortedset")
def main():
usage = """usage: %prog [options] /path/to/dump.rdb
Expand Down
4 changes: 4 additions & 0 deletions rdbtools/cli/redis_profiler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#!/usr/bin/env python
import os
import sys

PWD = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(PWD, '../../'))

from string import Template
from optparse import OptionParser
from rdbtools import RdbParser, MemoryCallback, PrintAllKeys, StatsAggregator
Expand Down
151 changes: 151 additions & 0 deletions rdbtools/cli/redis_stat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env python
import os
import sys

PWD = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(PWD, '../../'))

import collections

from string import Template
from optparse import OptionParser
from rdbtools import RdbParser, MemoryCallback, PrintAllKeys, StatsAggregator


class Stat2D(object):
def __init__(self, init=None):
self.ddict = collections.defaultdict(lambda:collections.defaultdict(int))

def __getitem__(self, key):
return self.ddict[key]

def __setitem__(self, key, value):
self.ddict[key] = value

def __delitem__(self, key):
del self.ddict[key]

def __contains__(self, key):
return key in self.ddict

def __len__(self):
return len(self.ddict)

def __repr__(self):
return repr(self.ddict)
def __iter__(self):
return self.ddict.__iter__()
def __reversed__(self):
return self.ddict.__reversed__()

def keys(self):
return self.ddict.keys()

def rows(self):
if not len(self.ddict):
return []

headers = [d.keys() for d in self.ddict.values()]
headers = list(set([i for sublist in headers for i in sublist])) # flat list and uniq
headers.sort()

rows = []
rows.append(['-'] + headers)
for k in self.ddict:
row = [str(self.ddict[k][h]) for h in headers]
rows.append([k] + row)
rows = sorted(rows)
return rows


def to_text(self, rows=None):
if not rows:
rows = self.rows()
if not rows:
return '-'

def padding(s, width):
return ' '*(width-len(s)) + s

for j in range(len(rows[0])):
max_width = max([len(row[j]) for row in rows])
for i in range(len(rows)):
rows[i][j] = padding(rows[i][j], max_width)

rows = [[c for c in r] for r in rows]
return '\n'.join( ['\t'.join(row) for row in rows])

def __str__(self):
return self.to_text()

def get_html(self):
pass


class MyStatsAggregator():
def __init__(self, key_groupings = []):
self.stat2d = Stat2D()
self.key_groupings = key_groupings

def next_record(self, record):

def get_ns(key):
for k in self.key_groupings:
if key.startswith(k):
return k
return 'z_unknown'
ns = get_ns(str(record.key))

for k in [ns, 'zz_all']:
self.stat2d[k]['cnt_' + record.type] += 1
self.stat2d[k]['mem_' + record.type] += record.bytes
self.stat2d[k]['cnt'] += 1
self.stat2d[k]['mem'] += record.bytes

if record.ttl == -1:
self.stat2d[k]['z_cnt_no_expire'] += 1
else:
self.stat2d[k]['z_sum_expire'] += record.ttl

#self.add_aggregate('key_group_count', ns, 1)
#self.add_aggregate('key_group_memory', ns, record.bytes)
#if record.ttl == -1:
#self.add_aggregate('key_group_no_expire_count', ns, 1)
#else:
#self.add_aggregate('key_group_sum_expire', ns, record.ttl)

def to_text(self):
return self.stat2d.to_text()

def main():
usage = """usage: %prog [options] /path/to/dump.rdb

Example 1 : %prog -k "user-" -k "friends-" /var/redis/6379/dump.rdb
Example 2 : %prog /var/redis/6379/dump.rdb"""

parser = OptionParser(usage=usage)

parser.add_option("-k", "--key", dest="keys", action="append",
help="Keys that should be grouped together. Multiple prefix can be provided")

(options, args) = parser.parse_args()

if len(args) == 0:
parser.error("Redis RDB file not specified")
dump_file = args[0]
if not options.keys:
options.keys = []

stat2d = MyStatsAggregator(options.keys)
callback = MemoryCallback(stat2d, 64)
parser = RdbParser(callback)
try:
parser.parse(dump_file)
except Exception, e:
print 'error: ', e

print stat2d.to_text()

if __name__ == '__main__':
main()

39 changes: 27 additions & 12 deletions rdbtools/memprofiler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from collections import namedtuple
import random
import json
import datetime
import math


from rdbtools.parser import RdbCallback
from rdbtools.callbacks import encode_key
Expand All @@ -9,7 +12,7 @@
ZSKIPLIST_P=0.25
REDIS_SHARED_INTEGERS = 10000

MemoryRecord = namedtuple('MemoryRecord', ['database', 'type', 'key', 'bytes', 'encoding','size', 'len_largest_element'])
MemoryRecord = namedtuple('MemoryRecord', ['database', 'type', 'key', 'bytes', 'encoding','size', 'len_largest_element','ttl'])

class StatsAggregator():
def __init__(self, key_groupings = None):
Expand All @@ -27,6 +30,8 @@ def next_record(self, record):

self.add_histogram(record.type + "_length", record.size)
self.add_histogram(record.type + "_memory", (record.bytes/10) * 10)

self.add_histogram('TTL',record.ttl)

if record.type == 'list':
self.add_scatter('list_memory_by_length', record.bytes, record.size)
Expand Down Expand Up @@ -70,23 +75,25 @@ def get_json(self):
class PrintAllKeys():
def __init__(self, out):
self._out = out
self._out.write("%s,%s,%s,%s,%s,%s,%s\n" % ("database", "type", "key",
"size_in_bytes", "encoding", "num_elements", "len_largest_element"))
self._out.write("%s,%s,%s,%s,%s,%s,%s,%s\n" % ("database", "type", "key",
"size_in_bytes", "encoding", "num_elements", "len_largest_element","ttl"))

def next_record(self, record) :
self._out.write("%d,%s,%s,%d,%s,%d,%d\n" % (record.database, record.type, encode_key(record.key),
record.bytes, record.encoding, record.size, record.len_largest_element))
self._out.write("%d,%s,%s,%d,%s,%d,%d,%s\n" % (record.database, record.type, encode_key(record.key),
record.bytes, record.encoding, record.size, record.len_largest_element,record.ttl))

class MemoryCallback(RdbCallback):
'''Calculates the memory used if this rdb file were loaded into RAM
The memory usage is approximate, and based on heuristics.
'''
def __init__(self, stream, architecture):
self._current_time = datetime.datetime.now()
self._stream = stream
self._dbnum = 0
self._current_size = 0
self._current_encoding = None
self._current_length = 0
self._current_expire = -1
self._len_largest_element = 0

if architecture == 64 or architecture == '64':
Expand All @@ -113,13 +120,14 @@ def set(self, key, value, expiry, info):
size += self.key_expiry_overhead(expiry)

length = element_length(value)
record = MemoryRecord(self._dbnum, "string", key, size, self._current_encoding, length, length)
record = MemoryRecord(self._dbnum, "string", key, size, self._current_encoding, length, length, self.parse_expiry(expiry))
self._stream.next_record(record)
self.end_key()

def start_hash(self, key, length, expiry, info):
self._current_encoding = info['encoding']
self._current_length = length
self._current_length = length
self._current_expire = expiry
size = self.sizeof_string(key)
size += 2*self.robj_overhead()
size += self.top_level_object_overhead()
Expand All @@ -146,7 +154,7 @@ def hset(self, key, field, value):
self._current_size += 2*self.robj_overhead()

def end_hash(self, key):
record = MemoryRecord(self._dbnum, "hash", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
record = MemoryRecord(self._dbnum, "hash", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element,self.parse_expiry(self._current_expire))
self._stream.next_record(record)
self.end_key()

Expand All @@ -164,13 +172,14 @@ def sadd(self, key, member):
self._current_size += self.robj_overhead()

def end_set(self, key):
record = MemoryRecord(self._dbnum, "set", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
record = MemoryRecord(self._dbnum, "set", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element,self.parse_expiry(self._current_expire))
self._stream.next_record(record)
self.end_key()

def start_list(self, key, length, expiry, info):
self._current_length = length
self._current_encoding = info['encoding']
self._current_expire = expiry
size = self.sizeof_string(key)
size += 2*self.robj_overhead()
size += self.top_level_object_overhead()
Expand All @@ -194,13 +203,14 @@ def rpush(self, key, value) :
self._current_size += self.robj_overhead()

def end_list(self, key):
record = MemoryRecord(self._dbnum, "list", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
record = MemoryRecord(self._dbnum, "list", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element, self.parse_expiry(self._current_expire))
self._stream.next_record(record)
self.end_key()

def start_sorted_set(self, key, length, expiry, info):
self._current_length = length
self._current_encoding = info['encoding']
self._current_expire = expiry
size = self.sizeof_string(key)
size += 2*self.robj_overhead()
size += self.top_level_object_overhead()
Expand All @@ -225,14 +235,15 @@ def zadd(self, key, score, member):
self._current_size += self.skiplist_entry_overhead()

def end_sorted_set(self, key):
record = MemoryRecord(self._dbnum, "sortedset", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
record = MemoryRecord(self._dbnum, "sortedset", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element, self.parse_expiry(self._current_expire))
self._stream.next_record(record)
self.end_key()

def end_key(self):
self._current_encoding = None
self._current_size = 0
self._len_largest_element = 0
self._current_expire = None

def sizeof_string(self, string):
# See struct sdshdr over here https://github.com/antirez/redis/blob/unstable/src/sds.h
Expand Down Expand Up @@ -327,7 +338,11 @@ def zset_random_level(self):
return level
else:
return ZSKIPLIST_MAXLEVEL

def parse_expiry(self,expiry):
if not expiry:
return -1
else:
return math.ceil((expiry-self._current_time).total_seconds())

def element_length(element):
if isinstance(element, int):
Expand Down