From 9d5e040ee42f1a22cafa76d3d928224a9e530070 Mon Sep 17 00:00:00 2001 From: 3553x <3553x@tuta.io> Date: Fri, 14 Jul 2017 19:33:21 +0100 Subject: [PATCH] BUG: Improved thread safety for read_html() GH16928 --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/io/html.py | 4 ++-- pandas/tests/io/test_html.py | 36 ++++++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 762107a2610907..688f1b2190544f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -164,6 +164,8 @@ I/O - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) +- Bug in :func:`read_html` where import check fails when run concurrently (:issue:`16928`) + Plotting ^^^^^^^^ - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) diff --git a/pandas/io/html.py b/pandas/io/html.py index 2613f26ae5f523..a4acb26af52590 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -37,8 +37,6 @@ def _importers(): if _IMPORTS: return - _IMPORTS = True - global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB try: @@ -59,6 +57,8 @@ def _importers(): except ImportError: pass + _IMPORTS = True + ############# # READ HTML # diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 4ef265dcd5113e..0455ffb0693228 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -3,13 +3,17 @@ import glob import os import re +import threading import warnings + +# imports needed for Python 3.x but will fail under Python 2.x try: - from importlib import import_module + from importlib import import_module, reload except ImportError: import_module = __import__ + from distutils.version import LooseVersion import pytest @@ -22,6 +26,7 @@ from pandas.compat import (map, zip, StringIO, string_types, BytesIO, is_platform_windows, PY3) from pandas.io.common import URLError, urlopen, file_path_to_url +import pandas.io.html from pandas.io.html import read_html from pandas._libs.parsers import ParserError @@ -931,3 +936,32 @@ def test_same_ordering(): dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) + + +class ErrorThread(threading.Thread): + def run(self): + try: + super(ErrorThread, self).run() + except Exception as e: + self.err = e + else: + self.err = None + + +@pytest.mark.slow +def test_importcheck_thread_safety(): + # see gh-16928 + + # force import check by reinitalising global vars in html.py + reload(pandas.io.html) + + filename = os.path.join(DATA_PATH, 'valid_markup.html') + helper_thread1 = ErrorThread(target=read_html, args=(filename,)) + helper_thread2 = ErrorThread(target=read_html, args=(filename,)) + + helper_thread1.start() + helper_thread2.start() + + while helper_thread1.is_alive() or helper_thread2.is_alive(): + pass + assert None is helper_thread1.err is helper_thread2.err