From 7c10891737672a4a0a1da0bf830d2ad1943e76fd Mon Sep 17 00:00:00 2001 From: Geremy Condra Date: Tue, 15 Apr 2014 11:18:41 -0700 Subject: [PATCH] verity: optimize for memory usage Reading the entire image into memory and then blockifying it will peak at using 2x the size of the image, and hold the size of the image in memory for the entire computation. Instead, read up to 16MB at a time and compute its block hashes before throwing it away and continuing. Change-Id: Ibbf1317616765a0dd8de4195b1046a75394c1815 --- verity/build_verity_tree.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/verity/build_verity_tree.py b/verity/build_verity_tree.py index 970d8c01..e580d3c0 100755 --- a/verity/build_verity_tree.py +++ b/verity/build_verity_tree.py @@ -23,14 +23,25 @@ def get_hash_image_size(data_image_size): def blockify(data): blocks = [] - for i in range(0, len(data), BLOCK_SIZE): + data_len = len(data) + assert(data_len % BLOCK_SIZE == 0) + for i in range(0, data_len, BLOCK_SIZE): chunk = data[i:i+BLOCK_SIZE] blocks.append(chunk) - return blocks + for b in blocks: + yield b -def read_blocks(image_path): - image = open(image_path, "rb").read() - return blockify(image) +def read_blocks(image_path, read_size=16*1024*1024): + image = open(image_path, "rb") + total_read = 0 + while True: + data = image.read(read_size) + if not data: + break + for block in blockify(data): + total_read += len(block) + yield block + assert(total_read == os.path.getsize(image_path)) def hash_block(data, salt): hasher = hashlib.new(HASH_FUNCTION) -- 2.11.0