From 2fdcfb145d571debec33702af3524e8552cbba9c Mon Sep 17 00:00:00 2001 From: 3553x <3553x@tuta.io> Date: Fri, 14 Jul 2017 19:33:21 +0100 Subject: [PATCH] BUG: Improved thread safety for read_html() GH16928 --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/io/html.py | 4 ++-- pandas/tests/io/test_html.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 762107a2610907..16c7c5418667be 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -164,6 +164,8 @@ I/O - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) +- Bug in :func:`read_html` importcheck fails when run concurrently (:issue:`16928`) + Plotting ^^^^^^^^ - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) diff --git a/pandas/io/html.py b/pandas/io/html.py index 2613f26ae5f523..a4acb26af52590 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -37,8 +37,6 @@ def _importers(): if _IMPORTS: return - _IMPORTS = True - global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB try: @@ -59,6 +57,8 @@ def _importers(): except ImportError: pass + _IMPORTS = True + ############# # READ HTML # diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 4ef265dcd5113e..647a952bc3ab88 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -931,3 +931,26 @@ def test_same_ordering(): dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) + + +class ErrorThread(threading.Thread): + def run(self): + try: + super(ErrorThread, self).run() + except Exception as e: + self.err = e + else: + self.err = None + + +@pytest.mark.slow +def test_importcheck_thread_safety(): + reload(pandas.io.html) + filename = os.path.join(DATA_PATH, 'valid_markup.html') + helper_thread1 = ErrorThread(target=read_html, args=(filename,)) + helper_thread2 = ErrorThread(target=read_html, args=(filename,)) + helper_thread1.start() + helper_thread2.start() + while(helper_thread1.is_alive() or helper_thread2.is_alive()): + pass + assert None is helper_thread1.err is helper_thread2.err