From 022354d2a44704ce359c85c628f5ee71bd257e90 Mon Sep 17 00:00:00 2001 From: Max Date: Sat, 2 Jan 2021 09:19:08 -0800 Subject: [PATCH] Python3 Support (#73) * (try to) Make it work with python 3 * replace print with print() * replace unichr() with chr() * replace xrange with range() * fix the imports also removed the content of __init__.py * fully working on python3 Co-authored-by: Nicolas CARPi --- __init__.py | 2 - main.py | 107 +++++++++++++++++------------------ recuperabit/fs/core_types.py | 6 +- recuperabit/fs/ntfs.py | 32 +++++------ recuperabit/fs/ntfs_fmt.py | 5 +- recuperabit/logic.py | 13 +++-- recuperabit/utils.py | 54 ++++++------------ 7 files changed, 98 insertions(+), 121 deletions(-) mode change 100755 => 100644 __init__.py diff --git a/__init__.py b/__init__.py old mode 100755 new mode 100644 index bf95bd2..e69de29 --- a/__init__.py +++ b/__init__.py @@ -1,2 +0,0 @@ -from . import * - diff --git a/main.py b/main.py index ada2732..0b94fc8 100755 --- a/main.py +++ b/main.py @@ -41,14 +41,14 @@ __email__ = "andrea.lazzarotto@gmail.com" -reload(sys) -sys.setdefaultencoding('utf-8') +#reload(sys) +#sys.setdefaultencoding('utf-8') """Wrapping sys.stdout into an instance of StreamWriter will allow writing unicode data with sys.stdout.write() and print. https://wiki.python.org/moin/PrintFails""" -sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) -sys.stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr) +#sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) +#sys.stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr) # classes of available scanners plugins = ( @@ -78,7 +78,7 @@ def list_parts(parts, shorthands, test): """List partitions corresponding to test.""" for i, part in shorthands: if test(parts[part]): - print 'Partition #' + str(i), '->', parts[part] + print('Partition #' + str(i), '->', parts[part]) def check_valid_part(num, parts, shorthands, rebuild=True): @@ -86,40 +86,40 @@ def check_valid_part(num, parts, shorthands, rebuild=True): try: i = int(num) except ValueError: - print 'Value is not valid!' + print('Value is not valid!') return None - if i in xrange(len(shorthands)): + if i in range(len(shorthands)): i, par = shorthands[i] part = parts[par] if rebuild and par not in rebuilt: - print 'Rebuilding partition...' + print('Rebuilding partition...') part.rebuild() rebuilt.add(par) - print 'Done' + print('Done') return part - print 'No partition with given ID!' + print('No partition with given ID!') return None def interpret(cmd, arguments, parts, shorthands, outdir): """Perform command required by user.""" if cmd == 'help': - print 'Available commands:' + print('Available commands:') for name, desc in commands: - print ' %s%s' % (name.ljust(28), desc) + print(' %s%s' % (name.ljust(28), desc)) elif cmd == 'tree': if len(arguments) != 1: - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: part = check_valid_part(arguments[0], parts, shorthands) if part is not None: - print '-'*10 - print utils.tree_folder(part.root) - print utils.tree_folder(part.lost) - print '-'*10 + print('-'*10) + print(utils.tree_folder(part.root)) + print(utils.tree_folder(part.lost)) + print('-'*10) elif cmd == 'bodyfile': if len(arguments) != 2: - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: part = check_valid_part(arguments[0], parts, shorthands) if part is not None: @@ -133,12 +133,12 @@ def interpret(cmd, arguments, parts, shorthands, outdir): try: with codecs.open(fname, 'w', encoding='utf8') as outfile: outfile.write('\n'.join(contents)) - print 'Saved body file to %s' % fname + print('Saved body file to %s' % fname) except IOError: - print 'Cannot open file %s for output!' % fname + print('Cannot open file %s for output!' % fname) elif cmd == 'csv': if len(arguments) != 2: - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: part = check_valid_part(arguments[0], parts, shorthands) if part is not None: @@ -149,12 +149,12 @@ def interpret(cmd, arguments, parts, shorthands, outdir): outfile.write( '\n'.join(contents) ) - print 'Saved CSV file to %s' % fname + print('Saved CSV file to %s' % fname) except IOError: - print 'Cannot open file %s for output!' % fname + print('Cannot open file %s for output!' % fname) elif cmd == 'tikzplot': if len(arguments) not in (1, 2): - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: part = check_valid_part(arguments[0], parts, shorthands) if part is not None: @@ -163,14 +163,14 @@ def interpret(cmd, arguments, parts, shorthands, outdir): try: with codecs.open(fname, 'w') as outfile: outfile.write(utils.tikz_part(part) + '\n') - print 'Saved Tikz code to %s' % fname + print('Saved Tikz code to %s' % fname) except IOError: - print 'Cannot open file %s for output!' % fname + print('Cannot open file %s for output!' % fname) else: - print utils.tikz_part(part) + print(utils.tikz_part(part)) elif cmd == 'restore': if len(arguments) != 2: - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: partid = arguments[0] part = check_valid_part(partid, parts, shorthands) @@ -185,12 +185,12 @@ def interpret(cmd, arguments, parts, shorthands, outdir): for i in [index, indexi]: myfile = part.get(i, myfile) if myfile is None: - print 'The index is not valid' + print('The index is not valid') else: logic.recursive_restore(myfile, part, partition_dir) elif cmd == 'locate': if len(arguments) != 2: - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: part = check_valid_part(arguments[0], parts, shorthands) if part is not None: @@ -201,10 +201,10 @@ def interpret(cmd, arguments, parts, shorthands, outdir): ' [GHOST]' if node.is_ghost else ' [DELETED]' if node.is_deleted else '' ) - print "[%s]: %s%s" % (node.index, path, desc) + print('[%s]: %s%s' % (node.index, path, desc)) elif cmd == 'traceback': if len(arguments) != 2: - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: partid = arguments[0] part = check_valid_part(partid, parts, shorthands) @@ -218,23 +218,23 @@ def interpret(cmd, arguments, parts, shorthands, outdir): for i in [index, indexi]: myfile = part.get(i, myfile) if myfile is None: - print 'The index is not valid' + print('The index is not valid') else: while myfile is not None: - print "[{}] {}".format(myfile.index, myfile.full_path(part)) + print('[{}] {}'.format(myfile.index, myfile.full_path(part))) myfile = part.get(myfile.parent) elif cmd == 'merge': if len(arguments) != 2: - print 'Wrong number of parameters!' + print('Wrong number of parameters!') else: part1 = check_valid_part(arguments[0], parts, shorthands, rebuild=False) part2 = check_valid_part(arguments[1], parts, shorthands, rebuild=False) if None in (part1, part2): return if part1.fs_type != part2.fs_type: - print 'Cannot merge partitions with types (%s, %s)' % (part1.fs_type, part2.fs_type) + print('Cannot merge partitions with types (%s, %s)' % (part1.fs_type, part2.fs_type)) return - print 'Merging partitions...' + print('Merging partitions...') utils.merge(part1, part2) source_position = int(arguments[1]) destination_position = int(arguments[0]) @@ -247,7 +247,7 @@ def interpret(cmd, arguments, parts, shorthands, outdir): rebuilt.remove(par) except: pass - print 'There are now %d partitions.' % (len(parts), ) + print('There are now %d partitions.' % (len(parts), )) elif cmd == 'recoverable': list_parts(parts, shorthands, lambda x: x.recoverable) elif cmd == 'other': @@ -257,21 +257,21 @@ def interpret(cmd, arguments, parts, shorthands, outdir): elif cmd == 'quit': exit(0) else: - print 'Unknown command.' + print('Unknown command.') def main(): """Wrap the program logic inside a function.""" logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) - print " ___ ___ _ _ " - print " | _ \___ __ _ _ _ __ ___ _ _ __ _| _ |_) |_ " - print " | / -_) _| || | '_ \/ -_) '_/ _` | _ \ | _|" - print " |_|_\___\__|\_,_| .__/\___|_| \__,_|___/_|\__|" - print " |_| v{}".format(__version__) - print ' ', __copyright__, '<%s>' % __email__ - print ' Released under the', __license__ - print '' + print(" ___ ___ _ _ ") + print(" | _ \___ __ _ _ _ __ ___ _ _ __ _| _ |_) |_ ") + print(" | / -_) _| || | '_ \/ -_) '_/ _` | _ \ | _|") + print(" |_|_\___\__|\_,_| .__/\___|_| \__,_|___/_|\__|") + print(" |_| v{}".format(__version__)) + print(' ', __copyright__, '<%s>' % __email__) + print(' Released under the', __license__) + print('') parser = argparse.ArgumentParser( description='Reconstruct the directory structure of possibly damaged ' @@ -335,10 +335,10 @@ def main(): # Ask for confirmation before beginning the process try: - confirm = raw_input('Type [Enter] to start the analysis or ' + confirm = input('Type [Enter] to start the analysis or ' '"exit" / "quit" / "q" to quit: ') except EOFError: - print '' + print('') exit(0) if confirm in ('exit', 'quit', 'q'): exit(0) @@ -369,12 +369,11 @@ def main(): logging.info('%i partitions found.', len(parts)) while True: - print '\nWrite command ("help" for details):' - print '>', + print('\nWrite command ("help" for details):') try: - command = raw_input().strip().split(' ') - except EOFError: - print '' + command = input('> ').split(' ') + except (EOFError, KeyboardInterrupt): + print('') exit(0) cmd = command[0] arguments = command[1:] diff --git a/recuperabit/fs/core_types.py b/recuperabit/fs/core_types.py index 69c352e..41345bd 100644 --- a/recuperabit/fs/core_types.py +++ b/recuperabit/fs/core_types.py @@ -26,7 +26,7 @@ import logging import os.path -from constants import sector_size +from .constants import sector_size from ..utils import readable_bytes @@ -94,9 +94,9 @@ def full_path(self, part): """Return the full path of this file.""" if self.parent is not None: parent = part[self.parent] - return os.path.join(parent.full_path(part), unicode(self.name)) + return os.path.join(parent.full_path(part), self.name) else: - return unicode(self.name) + return self.name def get_content(self, partition): # pylint: disable=W0613 diff --git a/recuperabit/fs/ntfs.py b/recuperabit/fs/ntfs.py index 9336c8e..9ce5c44 100644 --- a/recuperabit/fs/ntfs.py +++ b/recuperabit/fs/ntfs.py @@ -25,9 +25,9 @@ import logging from collections import Counter -from constants import max_sectors, sector_size -from core_types import DiskScanner, File, Partition -from ntfs_fmt import (attr_header_fmt, attr_names, attr_nonresident_fmt, +from .constants import max_sectors, sector_size +from .core_types import DiskScanner, File, Partition +from .ntfs_fmt import (attr_header_fmt, attr_names, attr_nonresident_fmt, attr_resident_fmt, attr_types_fmt, attribute_list_parser, boot_sector_fmt, entry_fmt, indx_dir_entry_fmt, indx_fmt, indx_header_fmt) @@ -97,7 +97,7 @@ def parse_mft_attr(attr): def _apply_fixup_values(header, entry): """Apply the fixup values to FILE and INDX records.""" offset = header['off_fixup'] - for i in xrange(1, header['n_entries']): + for i in range(1, header['n_entries']): pos = sector_size * i entry[pos-2:pos] = entry[offset + 2*i:offset + 2*(i+1)] @@ -267,7 +267,7 @@ def __init__(self, parsed, offset, is_ghost=False, ads=''): index = parsed['record_n'] ads_suffix = ':' + ads if ads != '' else ads if ads != '': - index = unicode(index) + ads_suffix + index = str(index) + ads_suffix attrs = parsed['attributes'] filenames = attrs['$FILE_NAME'] datas = attrs.get('$DATA', []) @@ -282,7 +282,7 @@ def __init__(self, parsed, offset, is_ghost=False, ads=''): break filtered = [ - f for f in filenames if f.has_key('content') and + f for f in filenames if 'content' in f and f['content'] is not None and f['content']['name_length'] > 0 and f['content']['name'] is not None @@ -368,7 +368,7 @@ def content_iterator(self, partition, image, datas): partial = self._padded_bytes(image, position, amount) length -= amount offset += amount - yield str(partial) + yield bytes(partial) vcn = attr['end_VCN'] + 1 def get_content(self, partition): @@ -415,7 +415,7 @@ def get_content(self, partition): start = single['dump_offset'] + single['content_off'] end = start + single['content_size'] content = dump[start:end] - return str(content) + return bytes(content) else: if partition.sec_per_clus is None: logging.error(u'Cannot restore non-resident $DATA ' @@ -472,17 +472,17 @@ def __init__(self, pointer): def feed(self, index, sector): """Feed a new sector.""" # check boot sector - if sector.endswith('\x55\xAA') and 'NTFS' in sector[:8]: + if sector.endswith(b'\x55\xAA') and b'NTFS' in sector[:8]: self.found_boot.append(index) return 'NTFS boot sector' # check file record - if sector.startswith(('FILE', 'BAAD')): + if sector.startswith((b'FILE', b'BAAD')): self.found_file.add(index) return 'NTFS file record' # check index record - if sector.startswith('INDX'): + if sector.startswith(b'INDX'): self.found_indx.add(index) return 'NTFS index record' @@ -518,14 +518,14 @@ def most_likely_sec_per_clus(self): to speed up the search.""" counter = Counter() counter.update(self.found_spc) - counter.update(2**i for i in xrange(8)) + counter.update(2**i for i in range(8)) return [i for i, _ in counter.most_common()] def find_boundary(self, part, mft_address, multipliers): """Determine the starting sector of a partition with INDX records.""" nodes = ( self.parsed_file_review[node.offset] - for node in part.files.itervalues() + for node in iter(part.files.values()) if node.offset in self.parsed_file_review and '$INDEX_ALLOCATION' in self.parsed_file_review[node.offset]['attributes'] @@ -643,7 +643,7 @@ def add_from_mft_mirror(self, part): if mirrpos is None: return - for i in xrange(4): + for i in range(4): node = part.get(i) if node is None or node.is_ghost: position = mirrpos + i * FILE_size @@ -665,7 +665,7 @@ def finalize_reconstruction(self, part): logging.info('Adding extra attributes from $ATTRIBUTE_LIST') # Select elements with many attributes many_attributes_it = ( - node for node in list(part.files.itervalues()) + node for node in part.files.values() if node.offset in self.parsed_file_review and '$ATTRIBUTE_LIST' in self.parsed_file_review[node.offset]['attributes'] @@ -677,7 +677,7 @@ def finalize_reconstruction(self, part): logging.info('Adding ghost entries from $INDEX_ALLOCATION') # Select only elements with $INDEX_ALLOCATION allocation_it = ( - node for node in list(part.files.itervalues()) + node for node in part.files.values() if node.offset in self.parsed_file_review and '$INDEX_ALLOCATION' in self.parsed_file_review[node.offset]['attributes'] diff --git a/recuperabit/fs/ntfs_fmt.py b/recuperabit/fs/ntfs_fmt.py index 3f3f7ec..97172a5 100644 --- a/recuperabit/fs/ntfs_fmt.py +++ b/recuperabit/fs/ntfs_fmt.py @@ -27,7 +27,7 @@ def printable_name(name): """Return a printable name decoded in UTF-16.""" decoded = [] - parts = (name[i:i+2] for i in xrange(0, len(name), 2)) + parts = (name[i:i+2] for i in range(0, len(name), 2)) for part in parts: try: decoded.append(part.decode('utf-16')) @@ -43,8 +43,7 @@ def printable_name(name): def windows_time(timestamp): """Convert a date-time value from Microsoft filetime to UTC.""" try: - encoded = str(timestamp[::-1]).encode('hex') - value = int(encoded, 16) # 'i' in unpack + value = int.from_bytes(timestamp, byteorder='little', signed=False) converted = datetime.utcfromtimestamp(value/10.**7 - 11644473600) return converted except ValueError: diff --git a/recuperabit/logic.py b/recuperabit/logic.py index 1387f9a..8a8fd4c 100644 --- a/recuperabit/logic.py +++ b/recuperabit/logic.py @@ -25,8 +25,9 @@ import os import os.path import sys +import types -from utils import tiny_repr +from .utils import tiny_repr class SparseList(object): @@ -36,7 +37,7 @@ def __init__(self, data=None, default=None): self.elements = {} self.default = default if data is not None: - self.keys = sorted(data.iterkeys()) + self.keys = sorted(iter(data)) self.elements.update(data) def __len__(self): @@ -59,7 +60,7 @@ def __setitem__(self, index, item): self.elements[index] = item def __contains__(self, element): - return element in self.elements.itervalues() + return element in self.elements.values() def __iter__(self): return self.keys.__iter__() @@ -71,7 +72,7 @@ def __repr__(self): k = self.keys[0] elems.append(str(k) + ' -> ' + tiny_repr(self.elements[k])) prevk = self.keys[0] - for i in xrange(1, len(self.elements)): + for i in range(1, len(self.elements)): nextk = self.keys[i] if nextk <= prevk + 2: while prevk < nextk - 1: @@ -205,7 +206,7 @@ def makedirs(path): def recursive_restore(node, part, outputdir, make_dirs=True): """Restore a directory structure starting from a file node.""" - parent_path = unicode( + parent_path = str( part[node.parent].full_path(part) if node.parent is not None else '' ) @@ -240,7 +241,7 @@ def recursive_restore(node, part, outputdir, make_dirs=True): if content is not None: logging.info(u'Restoring #%s %s', node.index, file_path) with codecs.open(restore_path, 'wb') as outfile: - if hasattr(content, '__iter__'): + if isinstance(content, types.GeneratorType): for piece in content: outfile.write(piece) else: diff --git a/recuperabit/utils.py b/recuperabit/utils.py index 584d7db..64ef84b 100644 --- a/recuperabit/utils.py +++ b/recuperabit/utils.py @@ -26,10 +26,10 @@ import time import unicodedata -from fs.constants import sector_size +from .fs.constants import sector_size printer = pprint.PrettyPrinter(indent=4) -all_chars = (unichr(i) for i in xrange(sys.maxunicode)) +all_chars = (chr(i) for i in range(sys.maxunicode)) unicode_printable = set( c for c in all_chars if not unicodedata.category(c)[0].startswith('C') @@ -60,18 +60,6 @@ def sectors(image, offset, size, bsize=sector_size, fill=True): return None return bytearray(dump) - -def signedbytes(data): - """Convert a bytearray into an integer, considering the first bit as - sign. The data must be Big-endian.""" - if data[0] & 0x80: - inverted = bytearray(~d % 256 for d in data) - return -signedbytes(inverted) - 1 - - encoded = str(data).encode('hex') - return int(encoded, 16) - - def unixtime(dtime): """Convert datetime to UNIX epoch.""" if dtime is None: @@ -82,6 +70,8 @@ def unixtime(dtime): return 0 +# format: +# [(label, (formatter, lower, higher)), ...] def unpack(data, fmt): """Extract formatted information from a string of bytes.""" result = {} @@ -105,14 +95,18 @@ def unpack(data, fmt): if formatter.endswith('i') and len(formatter) < 4: # Use little-endian by default. Big-endian with >i. # Force sign-extension of first bit with >+i / +i. - step = 1 if formatter.startswith('>') else -1 chunk = data[low:high+1] + + signed = False + if '+' in formatter: + signed = True + + byteorder = 'little' + if formatter.startswith('>'): + byteorder = 'big' + if len(chunk): - if '+' in formatter: - result[label] = signedbytes(chunk[::step]) - else: - encoded = str(chunk[::step]).encode('hex') - result[label] = int(encoded, 16) + result[label] = int.from_bytes(chunk, byteorder=byteorder, signed=signed) else: result[label] = None return result @@ -140,20 +134,6 @@ def printable(text, default='.', alphabet=None): alphabet = unicode_printable return ''.join((i if i in alphabet else default) for i in text) - -def hexdump(stream, count=16): - """Return a nice hexadecimal dump representation of stream.""" - stream = str(stream) - encoded = stream.encode('hex') - chunks = [encoded[i:i+2] for i in xrange(0, len(encoded), 2)] - lines = ( - u'%08d: ' % i + ' '.join(chunks[i:i+count]) + ' | ' + - printable(stream[i:i+count], alphabet=ascii_printable) - for i in xrange(0, len(chunks), count) - ) - return '\n'.join(lines) - - def pretty(dictionary): """Format dictionary with the pretty printer.""" return printer.pformat(dictionary) @@ -165,7 +145,7 @@ def show(dictionary): def tiny_repr(element): - """Return a representation of unicode strings without the u.""" + """deprecated: Return a representation of unicode strings without the u.""" rep = repr(element) return rep[1:] if type(element) == unicode else rep @@ -230,7 +210,7 @@ def tree_folder(directory, padding=0): def _bodyfile_repr(node, path): """Return a body file line for node.""" end = '/' if node.is_directory or len(node.children) else '' - return '|'.join(unicode(el) for el in [ + return '|'.join(str(el) for el in [ '0', # MD5 path + node.name + end, # name node.index, # inode @@ -285,7 +265,7 @@ def tikz_child(directory, padding=0): lines.append(content) count += number lines.append('}') - for entry in xrange(count): + for entry in range(count): lines.append('child [missing] {}') return '\n'.join(lines).replace('\n}', '}'), count