Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXAPPS-805] Notebook execution failures in CI. #12068

Merged
merged 2 commits into from
Aug 10, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tests/nightly/straight_dope/test_notebooks_multi_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
This file tests that the notebooks requiring multi GPUs run without
warning or exception.
"""
import logging
import unittest
from straight_dope_test_utils import _test_notebook
from straight_dope_test_utils import _download_straight_dope_notebooks

class StraightDopeMultiGpuTests(unittest.TestCase):
@classmethod
def setUpClass(self):
logging.basicConfig(level=logging.INFO)
assert _download_straight_dope_notebooks()

# Chapter 7
Expand Down
3 changes: 2 additions & 1 deletion tests/nightly/straight_dope/test_notebooks_single_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
warning or exception.
"""
import glob
import logging
import re
import os
import unittest
Expand Down Expand Up @@ -51,9 +52,9 @@
class StraightDopeSingleGpuTests(unittest.TestCase):
@classmethod
def setUpClass(self):
logging.basicConfig(level=logging.INFO)
assert _download_straight_dope_notebooks()


def test_completeness(self):
"""
Make sure that every tutorial that isn't in the whitelist is considered for testing by this
Expand Down
23 changes: 18 additions & 5 deletions tests/utils/notebook_test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@

IPYTHON_VERSION = 4 # Pin to ipython version 4.
TIME_OUT = 10*60 # Maximum 10 mins/test. Reaching timeout causes test failure.
RETRIES = 10

try:
TimeoutError
except NameError:
# py2
TimeoutError = RuntimeError

def run_notebook(notebook, notebook_dir, kernel=None, no_cache=False, temp_dir='tmp_notebook'):
"""Run tutorial Jupyter notebook to catch any execution error.
Expand Down Expand Up @@ -72,15 +79,21 @@ def run_notebook(notebook, notebook_dir, kernel=None, no_cache=False, temp_dir='
os.makedirs(working_dir)
try:
notebook = nbformat.read(notebook_path + '.ipynb', as_version=IPYTHON_VERSION)
# Adding a small delay to allow time for sockets to be freed
# stop-gap measure to battle the 1000ms linger of socket hard coded
# in the kernel API code
time.sleep(1.1)
if kernel is not None:
eprocessor = ExecutePreprocessor(timeout=TIME_OUT, kernel_name=kernel)
else:
eprocessor = ExecutePreprocessor(timeout=TIME_OUT)
nb, _ = eprocessor.preprocess(notebook, {'metadata': {'path': working_dir}})

# There is a low (< 1%) chance that starting a notebook executor will fail due to the kernel
# taking to long to start, or a port collision, etc.
for i in range(RETRIES):
try:
nb, _ = eprocessor.preprocess(notebook, {'metadata': {'path': working_dir}})
except (RuntimeError, TimeoutError) as rte:
logging.info("Error starting preprocessor: {}. Attempt {}/{}".format(str(rte), i+1, RETRIES))
time.sleep(1)
continue
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rethrow exception?

Also, what happens if there is a valid failure? Wouldn't this result in a lot of failures because all notebooks get called 10 times?

Copy link
Contributor

@marcoabreu marcoabreu Aug 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good if we could be more specific than just runtime error - maybe look for a specific message?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a commit which scans for the error message we would like to retry on that encompasses the issues we're seeing. We won't need to scan for TimeoutErrors as before, just RuntimeErrors with one specific error message.

break
except Exception as err:
err_msg = str(err)
errors.append(err_msg)
Expand Down