diff --git a/README.md b/README.md index 174f885d..859bcebb 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,14 @@ We reverse-engineered the storage protocol of WeChat messages, and provide this tool to decrypt and parse WeChat messages on a rooted android phone. It can also render the messages into self-contained html files including voice messages, images, emojis, videos, etc. +The tool is last verified to work with latest version of wechat on 2025/01/01. If the tool works for you, please take a moment to add your phone/OS to [the wiki](https://github.com/ppwwyyxx/wechat-dump/wiki). ## How to use: #### Dependencies: + adb and rooted android phone connected to a Linux/Mac OSX/Win10+Bash. - If the phone does not come with adb support, you can try download an app. -+ Python >= 3.6 ++ Python >= 3.8 + [sqlcipher](https://github.com/sqlcipher/sqlcipher) >= 4.1 + sox (command line tools) + Silk audio decoder (included; build it with `./third-party/compile_silk.sh`) @@ -26,7 +26,8 @@ If the tool works for you, please take a moment to add your phone/OS to [the wik 1. Pull database file and (for older wechat versions) avatar index: + Automatic: `./android-interact.sh db`. It may use an incorrect userid. + Manual: - + Figure out your `${userid}` by inspecting the contents of `/data/data/com.tencent.mm/MicroMsg` on the __root__ filesystem of the device. It should be a 32-character-long name consisting of hexadecimal digits. + + Figure out your `${userid}` by inspecting the contents of `/data/data/com.tencent.mm/MicroMsg` on the __root__ filesystem of the device. + It should be a 32-character-long name consisting of hexadecimal digits. + Get `/data/data/com.tencent.mm/MicroMsg/${userid}/EnMicroMsg.db` from the device. 2. Decrypt database file: + Automatic: `./decrypt-db.py decrypt --input EnMicroMsg.db` @@ -52,11 +53,12 @@ If the tool works for you, please take a moment to add your phone/OS to [the wik If the above decryption doesn't work, you can also try the [password cracker](https://github.com/chg-hou/EnMicroMsg.db-Password-Cracker) to brute-force the key. The encryption key is not very strong. -3. Copy the WeChat user resource directory `/mnt/sdcard/tencent/MicroMsg/${userid}/{avatar,emoji,image2,sfs,video,voice2}` from the phone to the `resource` directory: +3. Copy the WeChat user resource directory `/data/data/com.tencent.mm/MicroMsg/${userid}/{avatar,emoji,image2,sfs,video,voice2}` from the phone to the `resource` directory: + `./android-interact.sh res` + Change `RES_DIR` in the script if the location of these directories is different on your phone. - + This can take a while. Can be faster to first archive it with `tar` with or without compression, and then copy the archive, - `busybox tar` is recommended as the Android system's `tar` may choke on long paths. + For older version of wechat, the directory may be `/mnt/sdcard/tencent/MicroMsg/` + + This can take a while. It can be faster to first archive it with `tar` with or without compression, and then copy the archive, + `busybox tar` is recommended as the Android system's `tar` may choke on long paths. + In the end, we need a `resource` directory with the following subdir: `avatar,emoji,image2,sfs,video,voice2`. 4. (Optional) Download the emoji cache from [here](https://github.com/ppwwyyxx/wechat-dump/releases/download/0.1/emoji.cache.tar.bz2) @@ -101,10 +103,10 @@ Screenshots of generated html: See [here](http://ppwwyyxx.com/static/wechat/example.html) for an example html. -### TODO List -+ Fix rare unhandled message types: > 10000 and < 0 -+ Better user experiences... see `grep 'TODO' wechat -R` - +### TODO List (help needed!) +* __IMPORTANT__ Some emojis and chat images are stored in a proprietary "wxgf" format. We don't yet know how to decode this format. +* Fix rare unhandled message types: > 10000 and < 0 +* Better user experiences... see `grep 'TODO' wechat -R` ### Donate! diff --git a/android-interact.sh b/android-interact.sh index b812541f..fb39501e 100755 --- a/android-interact.sh +++ b/android-interact.sh @@ -6,7 +6,8 @@ PROG_DIR=`dirname "$PROG_NAME"` cd "$PROG_DIR" # Please check that your path is the same, since this might be different among devices -RES_DIR="/mnt/sdcard/tencent/MicroMsg" +# RES_DIR="/mnt/sdcard/tencent/MicroMsg" # old version of wechat use this path. +RES_DIR="/data/data/com.tencent.mm" MM_DIR="/data/data/com.tencent.mm" echo "Starting rooted adb server..." diff --git a/decrypt-db.py b/decrypt-db.py index d283d71b..d5c53804 100755 --- a/decrypt-db.py +++ b/decrypt-db.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import os +import shlex import sys import re import struct @@ -21,11 +22,15 @@ MM_DIR = "/data/data/com.tencent.mm" +def adb_command(command): + return subproc_succ("adb shell su -c " + shlex.quote(command)) + + def get_uin(): candidates = [] try: uin = None - out = subproc_succ(f"adb shell cat {MM_DIR}/shared_prefs/system_config_prefs.xml") + out = adb_command(f"cat {MM_DIR}/shared_prefs/system_config_prefs.xml") for line in out.decode('utf-8').split("\n"): if "default_uin" in line: line = PyQuery(line) @@ -40,7 +45,7 @@ def get_uin(): try: uin = None - out = subproc_succ(f"adb shell cat {MM_DIR}/shared_prefs/com.tencent.mm_preferences.xml") + out = adb_command(f"cat {MM_DIR}/shared_prefs/com.tencent.mm_preferences.xml") for line in out.decode('utf-8').split("\n"): if "last_login_uin" in line: line = PyQuery(line) @@ -55,7 +60,7 @@ def get_uin(): try: uin = None - out = subproc_succ(f"adb shell cat {MM_DIR}/shared_prefs/auth_info_key_prefs.xml") + out = adb_command(f"cat {MM_DIR}/shared_prefs/auth_info_key_prefs.xml") for line in out.decode('utf-8').split("\n"): if "auth_uin" in line: line = PyQuery(line) @@ -69,7 +74,7 @@ def get_uin(): logger.info(f"found uin={uin} in auth_info_key_prefs.xml") try: - out = subproc_succ(f"adb shell cat {MM_DIR}/MicroMsg/systemInfo.cfg") + out = adb_command(f"cat {MM_DIR}/MicroMsg/systemInfo.cfg") uin = int(javaobj.loads(out).get(1, 0)) except: logger.warning("default uin not found in systemInfo.cfg") @@ -101,13 +106,13 @@ def get_int(self, offset=4): def get_utf16(self, offset=4): return (self.data[offset + 4: offset+4+self.get_int(offset) * 2]).decode('utf-16') - out = subproc_succ("adb shell service call iphonesubinfo 1") + out = adb_command(f"service call iphonesubinfo 1") imei = Parcel(out.strip()).get_utf16() logger.info(f"found imei={imei} from iphonesubinfo") candidates.append(imei) try: - out = subproc_succ(f"adb shell cat {MM_DIR}/MicroMsg/CompatibleInfo.cfg") + out = adb_command(f"cat {MM_DIR}/MicroMsg/CompatibleInfo.cfg") # https://gist.github.com/ChiChou/36556fd412a9e3216abecf06e084e4d9 jobj = javaobj.loads(out) imei = jobj[258] diff --git a/third-party/silk/Makefile b/third-party/silk/Makefile index f09f9cc2..8587c0f5 100644 --- a/third-party/silk/Makefile +++ b/third-party/silk/Makefile @@ -47,7 +47,7 @@ ifeq (yes,$(USE_NEON)) endif -CFLAGS += -Wall -enable-threads -O3 +CFLAGS += -Wall -O3 CFLAGS += $(call cppflags-from-defines,$(CDEFINES)) CFLAGS += $(call cppflags-from-defines,$(ADDED_DEFINES)) diff --git a/wechat/avatar.py b/wechat/avatar.py index dd9d0470..bd795e98 100644 --- a/wechat/avatar.py +++ b/wechat/avatar.py @@ -54,6 +54,11 @@ def get_avatar_from_avtdir(self, avtid): candidates = glob.glob(os.path.join(self.avt_dir, dir1, dir2, f"*{avtid}*")) candidates = sorted(set(candidates), key=_filename_priority, reverse=True) for cand in candidates: + if os.path.isdir(cand): + candidates.extend(os.path.join(cand, x) for x in os.listdir(cand)) + for cand in candidates: + if os.path.isdir(cand): + continue try: if cand.endswith(".bm"): return self.read_bm_file(cand) diff --git a/wechat/msg.py b/wechat/msg.py index e6cfbdcd..c2ad5471 100644 --- a/wechat/msg.py +++ b/wechat/msg.py @@ -14,6 +14,7 @@ TYPE_REDENVELOPE = 436207665 TYPE_MONEY_TRANSFER = 419430449 # 微信转账 TYPE_LOCATION_SHARING = -1879048186 +TYPE_REPLY = 822083633 # 回复的消息. TYPE_APP_MSG = 16777265 _KNOWN_TYPES = [eval(k) for k in dir() if k.startswith('TYPE_')] @@ -110,6 +111,11 @@ def msg_str(self): except: pass return "[Money Transfer]" + elif self.type == TYPE_REPLY: + pq = PyQuery(self.content_xml_ready) + msg = pq('title').text() + # TODO parse reply. + return msg else: # TODO replace smiley with text return self.content diff --git a/wechat/parser.py b/wechat/parser.py index 78c2b98a..361fd8ad 100644 --- a/wechat/parser.py +++ b/wechat/parser.py @@ -74,7 +74,10 @@ def _parse_msg(self): def _parse_userinfo(self): userinfo_q = self.cc.execute(""" SELECT id, value FROM userinfo """) userinfo = dict(userinfo_q) - self.username = userinfo[2] + self.username = userinfo.get(2, None) + if self.username is None: + logger.error("Cannot find username in userinfo table!") + self.username = input("Please enter your username:") logger.info("Your username is: {}".format(self.username)) def _parse_imginfo(self): diff --git a/wechat/res.py b/wechat/res.py index 6c2fc12b..0e0cc333 100644 --- a/wechat/res.py +++ b/wechat/res.py @@ -147,11 +147,18 @@ def get_jpg_b64(img_file): if not img_file: return None if not img_file.endswith('jpg') and \ - imghdr.what(img_file) != 'jpeg': - im = Image.open(open(img_file, 'rb')) + imghdr.what(img_file) != 'jpeg': + try: + im = Image.open(open(img_file, 'rb')) + except: + return None buf = io.BytesIO() im.convert('RGB').save(buf, 'JPEG', quality=JPEG_QUALITY) return base64.b64encode(buf.getvalue()).decode('ascii') + with open(img_file, 'rb') as f: + if f.read(4) == b'wxgf': + logger.warning(f"Don't know how to decode wxgf image {img_file}") + return None return get_file_b64(img_file) big_file = get_jpg_b64(big_file)