-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
EHN: Improved thread safety for read_html() GH16928 #16930
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,13 +3,17 @@ | |
import glob | ||
import os | ||
import re | ||
import threading | ||
import warnings | ||
|
||
|
||
# imports needed for Python 3.x but will fail under Python 2.x | ||
try: | ||
from importlib import import_module | ||
from importlib import import_module, reload | ||
except ImportError: | ||
import_module = __import__ | ||
|
||
|
||
from distutils.version import LooseVersion | ||
|
||
import pytest | ||
|
@@ -22,6 +26,7 @@ | |
from pandas.compat import (map, zip, StringIO, string_types, BytesIO, | ||
is_platform_windows, PY3) | ||
from pandas.io.common import URLError, urlopen, file_path_to_url | ||
import pandas.io.html | ||
from pandas.io.html import read_html | ||
from pandas._libs.parsers import ParserError | ||
|
||
|
@@ -931,3 +936,32 @@ def test_same_ordering(): | |
dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) | ||
dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) | ||
assert_framelist_equal(dfs_lxml, dfs_bs4) | ||
|
||
|
||
class ErrorThread(threading.Thread): | ||
def run(self): | ||
try: | ||
super(ErrorThread, self).run() | ||
except Exception as e: | ||
self.err = e | ||
else: | ||
self.err = None | ||
|
||
|
||
@pytest.mark.slow | ||
def test_importcheck_thread_safety(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you reference issue number below the test function definition? Something like "see gh-16928" will suffice. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you can only test under py3 then use a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've just tested it with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so if this works, then remove the comment above |
||
# see gh-16928 | ||
|
||
# force import check by reinitalising global vars in html.py | ||
reload(pandas.io.html) | ||
|
||
filename = os.path.join(DATA_PATH, 'valid_markup.html') | ||
helper_thread1 = ErrorThread(target=read_html, args=(filename,)) | ||
helper_thread2 = ErrorThread(target=read_html, args=(filename,)) | ||
|
||
helper_thread1.start() | ||
helper_thread2.start() | ||
|
||
while helper_thread1.is_alive() or helper_thread2.is_alive(): | ||
pass | ||
assert None is helper_thread1.err is helper_thread2.err |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not needed here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you elaborate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@3553x : He means to just remove the line. I thought that line would be useful for clarity?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if this is not supported in py2, then you should either conditionally import it, or set
reload=None
and check it (and skip in a test).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reload
is a built-in in Python2. There is no need to import it unless you are using Python3.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok