From d39eeb142166c6eae1c0732ebc481f4e9e31fe8f Mon Sep 17 00:00:00 2001 From: Omer Belhasin Date: Sun, 25 Nov 2018 14:20:55 +0200 Subject: [PATCH 1/2] add functional test script --- test/test.py | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 test/test.py diff --git a/test/test.py b/test/test.py new file mode 100644 index 000000000..245658a2f --- /dev/null +++ b/test/test.py @@ -0,0 +1,291 @@ +import sys +import yaml +import os +import unittest +import ibm_boto3 +from ibm_botocore.client import Config +from ibm_botocore.client import ClientError +import pywren_ibm_cloud as pywren +import urllib.request + +PREFIX = '__pywren.test' + +# Dataset from: https://archive.ics.uci.edu/ml/datasets/bag+of+words +TEST_FILES_URLS = ['http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.enron.txt', + 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.kos.txt', + 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nips.txt', + 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nytimes.txt', + 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.pubmed.txt'] + +try: + config_path = os.path.join(os.path.expanduser("~/.pywren_config")) + with open(config_path, 'r') as config_file: + CONFIG = yaml.safe_load(config_file) +except: + print("can't open config file") + sys.exit() + +def initCos(): + return ibm_boto3.resource("s3", + ibm_api_key_id=CONFIG['ibm_cos']['api_key'], + ibm_auth_endpoint='https://iam.ng.bluemix.net/oidc/token', + config=Config(signature_version="oauth"), + endpoint_url=CONFIG['ibm_cos']['endpoint'] + ) + + +def putFileToCOS(cos, bucket_name, key, bytes): + try: + cos.Object(bucket_name, key).put(Body=bytes) + print("Upload file: {} - SUCCESS".format(key)) + except ClientError as be: + print("CLIENT ERROR: {0}\n".format(be)) + except Exception as e: + print("Unable to create bucket: {0}".format(e)) + + +def getFilenamesFromCOS(cos, bucket_name, prefix): + print("Retrieving items' names from bucket: {0}, prefix: {1}".format(bucket_name, prefix)) + result = [] + try: + for data in cos.Bucket(bucket_name).objects.filter(Prefix=prefix): + result.append(data.key) + except ClientError as be: + print("CLIENT ERROR: {0}\n".format(be)) + except Exception as e: + print("Unable to delete item: {0}".format(e)) + return result + + +def getFileFromCOS(cos, bucket_name, key): + print("Retrieving item from bucket: {0}, key: {1}".format(bucket_name, key)) + try: + file = cos.Object(bucket_name, key).get() + return file["Body"].read() + except ClientError as be: + print("CLIENT ERROR: {0}\n".format(be)) + except Exception as e: + print("Unable to retrieve file contents: {0}".format(e)) + + +def deleteFileFromCOS(cos, bucket_name, key): + try: + cos.Object(bucket_name, key).delete() + print("File: {0} deleted!".format(key)) + except ClientError as be: + print("CLIENT ERROR: {0}\n".format(be)) + except Exception as e: + print("Unable to delete item: {0}".format(e)) + + +class TestPywren(unittest.TestCase): + + def hello_world(self, param): + return "Hello World!" + + def simple_map_function(self, x, y): + return x + y + + def simple_reduce_function(self, results): + total = 0 + for map_result in results: + total = total + map_result + return total + + def test_call_async(self): + pw = pywren.ibm_cf_executor() + pw.call_async(self.hello_world, "") + result = pw.get_result() + self.assertEqual(result, "Hello World!") + + pw = pywren.ibm_cf_executor() + pw.call_async(self.simple_map_function, [4, 6]) + result = pw.get_result() + self.assertEqual(result, 10) + + pw = pywren.ibm_cf_executor() + pw.call_async(self.simple_map_function, {'x': 2, 'y': 8}) + result = pw.get_result() + self.assertEqual(result, 10) + + def test_map(self): + iterdata = [[1, 1], [2, 2], [3, 3], [4, 4]] + pw = pywren.ibm_cf_executor() + pw.map(self.simple_map_function, iterdata) + result = pw.get_result() + self.assertEqual(result, [2, 4, 6, 8]) + + def test_map_reduce(self): + iterdata = [[1, 1], [2, 2], [3, 3], [4, 4]] + pw = pywren.ibm_cf_executor() + pw.map_reduce(self.simple_map_function, iterdata, self.simple_reduce_function) + result = pw.get_result() + self.assertEqual(result, 20) + + +def initTests(): + print('Uploading test files...') + + cos = initCos() + result_to_compare = 1 # including result's word + i = 0 + for url in TEST_FILES_URLS: + content = urllib.request.urlopen(url).read() + putFileToCOS(cos, CONFIG['pywren']['storage_bucket'], PREFIX + '/test' + str(i), content) + result_to_compare += len(content.split()) + i += 1 + + putFileToCOS(cos, CONFIG['pywren']['storage_bucket'], PREFIX + '/result', str(result_to_compare).encode()) + + print("ALL DONE") + + +def cleanTests(): + print('Deleting test files...') + + cos = initCos() + for key in getFilenamesFromCOS(cos, CONFIG['pywren']['storage_bucket'], PREFIX): + deleteFileFromCOS(cos, CONFIG['pywren']['storage_bucket'], key) + + print("ALL DONE") + + +class TestPywrenCos(unittest.TestCase): + + def my_map_function_bucket(self, bucket, key, data_stream): + print('I am processing the object {}'.format(key)) + counter = {} + + data = data_stream.read() + + for line in data.splitlines(): + for word in line.decode('utf-8').split(): + if word not in counter: + counter[word] = 1 + else: + counter[word] += 1 + + return counter + + def my_map_function_key(self, key, data_stream): + print('I am processing the object {}'.format(key)) + counter = {} + + data = data_stream.read() + + for line in data.splitlines(): + for word in line.decode('utf-8').split(): + if word not in counter: + counter[word] = 1 + else: + counter[word] += 1 + + return counter + + def my_map_function_url(self, url, data_stream): + print('I am processing the object from {}'.format(url)) + counter = {} + + data = data_stream.read() + + for line in data.splitlines(): + for word in line.decode('utf-8').split(): + if word not in counter: + counter[word] = 1 + else: + counter[word] += 1 + + return counter + + def my_reduce_function(self, results): + final_result = 0 + + for count in results: + for word in count: + final_result += count[word] + + return final_result + + def checkResult(self, cos, result): + result_to_compare = getFileFromCOS(cos, CONFIG['pywren']['storage_bucket'], PREFIX + '/result') + + if isinstance(result, list): + total = 0 + for r in result: + total += r + else: + total = result + + self.assertEqual(total, int(result_to_compare)) + + def test_map_reduce_cos_bucket(self): + data_prefix = CONFIG['pywren']['storage_bucket'] + '/' + PREFIX + chunk_size = 4 * 1024 ** 2 # 4MB + pw = pywren.ibm_cf_executor() + pw.map_reduce(self.my_map_function_bucket, data_prefix, self.my_reduce_function, chunk_size) + result = pw.get_result() + self.checkResult(initCos(), result) + + def test_map_reduce_cos_bucket_one_reducer_per_object(self): + data_prefix = CONFIG['pywren']['storage_bucket'] + '/' + PREFIX + chunk_size = 4 * 1024 ** 2 # 4MB + pw = pywren.ibm_cf_executor() + pw.map_reduce(self.my_map_function_bucket, data_prefix, self.my_reduce_function, chunk_size, + reducer_one_per_object=True) + result = pw.get_result() + self.checkResult(initCos(), result) + + def test_map_reduce_cos_key(self): + cos = initCos() + bucket_name = CONFIG['pywren']['storage_bucket'] + iterdata = [bucket_name + '/' + key for key in getFilenamesFromCOS(cos, bucket_name, PREFIX)] + chunk_size = 4 * 1024 ** 2 # 4MB + pw = pywren.ibm_cf_executor() + pw.map_reduce(self.my_map_function_key, iterdata, self.my_reduce_function, chunk_size) + result = pw.get_result() + self.checkResult(cos, result) + + def test_map_reduce_cos_key_one_reducer_per_object(self): + cos = initCos() + bucket_name = CONFIG['pywren']['storage_bucket'] + iterdata = [bucket_name + '/' + key for key in getFilenamesFromCOS(cos, bucket_name, PREFIX)] + chunk_size = 4 * 1024 ** 2 # 4MB + pw = pywren.ibm_cf_executor() + pw.map_reduce(self.my_map_function_key, iterdata, self.my_reduce_function, chunk_size, + reducer_one_per_object=True) + result = pw.get_result() + self.checkResult(cos, result) + + def test_map_reduce_url(self): + chunk_size = 4 * 1024 ** 2 # 4MB + pw = pywren.ibm_cf_executor() + pw.map_reduce(self.my_map_function_url, TEST_FILES_URLS, self.my_reduce_function, chunk_size) + result = pw.get_result() + self.checkResult(initCos(), result + 1) + + +if __name__ == '__main__': + if len(sys.argv) <= 1: + task = 'full' + else: + task = sys.argv[1] + + if task == 'init': + initTests() + elif task == 'clean': + cleanTests() + else: + suite = unittest.TestSuite() + if task == 'pywren': + suite.addTest(unittest.makeSuite(TestPywren)) + elif task == 'pywren_cos': + suite.addTest(unittest.makeSuite(TestPywrenCos)) + elif task == 'full': + suite.addTest(unittest.makeSuite(TestPywren)) + suite.addTest(unittest.makeSuite(TestPywrenCos)) + else: + print('Unknown Command... use: "init", "pywren", "pywren_cos" or "clean".') + sys.exit() + + runner = unittest.TextTestRunner() + runner.run(suite) From f942dffc9a9fa610bb76a33ae2f8974f60978f54 Mon Sep 17 00:00:00 2001 From: Omer Belhasin Date: Sun, 25 Nov 2018 16:15:02 +0200 Subject: [PATCH 2/2] doc update and data input management improved --- README.md | 22 ++++++++++++++++++++-- test/data | 5 +++++ test/{test.py => testpywren.py} | 12 ++++++------ 3 files changed, 31 insertions(+), 8 deletions(-) create mode 100644 test/data rename test/{test.py => testpywren.py} (93%) diff --git a/README.md b/README.md index 099b81c31..933c31bf2 100644 --- a/README.md +++ b/README.md @@ -155,9 +155,27 @@ pw = pywren.ibm_cf_executor(config=config) ### Verify -To test that all is working, run the [pywrentest](examples/pywrentest.py) located in the `examples` folder. From the project root folder, execute +To test that all is working, run the [testpywren.py](test/testpywren.py) located in the `test` folder with the arguments listed below. - python3 examples/pywrentest.py +For initializing test files using IBM Cloud Object Storage service, execute once from the project root folder: + + python3 test/testpywren.py init + +and then execute: + +| Command | Explanation | +|---| ---| +| `python3 test/testpywren.py` | test all PyWren's functionality | +| `python3 test/testpywren.py pywren` | test PyWren without Cloud Object Storage service | +| `python3 test/testpywren.py pywren_cos` | test PyWren using Cloud Object Storage service only | + +To clean test files stored in Cloud Object Storage service, execute: + + python3 test/testpywren.py clean + +_NOTE:_ The test script assumes that a local PyWren's config file was set correctly. + +To edit tests' data, open the [data](test/data) file located in the `test` folder and simply add or remove text URL files. ## How to use PyWren for IBM Cloud Functions diff --git a/test/data b/test/data new file mode 100644 index 000000000..f486eb8ca --- /dev/null +++ b/test/data @@ -0,0 +1,5 @@ +http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.enron.txt +http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.kos.txt +http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nips.txt +http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nytimes.txt +http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.pubmed.txt \ No newline at end of file diff --git a/test/test.py b/test/testpywren.py similarity index 93% rename from test/test.py rename to test/testpywren.py index 245658a2f..d9aeb6115 100644 --- a/test/test.py +++ b/test/testpywren.py @@ -10,12 +10,12 @@ PREFIX = '__pywren.test' -# Dataset from: https://archive.ics.uci.edu/ml/datasets/bag+of+words -TEST_FILES_URLS = ['http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.enron.txt', - 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.kos.txt', - 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nips.txt', - 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nytimes.txt', - 'http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.pubmed.txt'] +try: + with open('data', 'r') as data_file: + TEST_FILES_URLS = [url for url in data_file.read().split()] +except: + print("can't open data file") + sys.exit() try: config_path = os.path.join(os.path.expanduser("~/.pywren_config"))