Skip to content

Commit

Permalink
Merge pull request #4352 from cpcloud/html-testing-assertions-fix
Browse files Browse the repository at this point in the history
TST/BUG: make sure read_html tests work on python 2.6
  • Loading branch information
cpcloud committed Jul 26, 2013
2 parents 7a15bdd + 5c9aae2 commit a8f52f9
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 19 deletions.
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ pandas 0.13
(:issue:`4353`)
- Fixed an issue where ``DataFrame.sum`` was slower than ``DataFrame.mean``
for integer valued frames (:issue:`4365`)
- ``read_html`` tests now work with Python 2.6 (:issue:`4351`)

pandas 0.12
===========
Expand Down
2 changes: 2 additions & 0 deletions doc/source/v0.13.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ Bug Fixes
- Fixed an issue where ``DataFrame.sum`` was slower than ``DataFrame.mean``
for integer valued frames (:issue:`4365`)

- ``read_html`` tests now work with Python 2.6 (:issue:`4351`)

See the :ref:`full release notes
<release>` or issue tracker
on GitHub for a complete list.
38 changes: 19 additions & 19 deletions pandas/io/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pandas.util.testing import (assert_frame_equal, network,
get_data_path)

from pandas.util.testing import makeCustomDataframe as mkdf, rands
from pandas.util.testing import makeCustomDataframe as mkdf


def _have_module(module_name):
Expand Down Expand Up @@ -157,12 +157,12 @@ def test_spam(self):
def test_spam_no_match(self):
dfs = self.run_read_html(self.spam_data)
for df in dfs:
self.assertIsInstance(df, DataFrame)
self.assert_(isinstance(df, DataFrame))

def test_banklist_no_match(self):
dfs = self.run_read_html(self.banklist_data, attrs={'id': 'table'})
for df in dfs:
self.assertIsInstance(df, DataFrame)
self.assert_(isinstance(df, DataFrame))

def test_spam_header(self):
df = self.run_read_html(self.spam_data, '.*Water.*', header=0)
Expand Down Expand Up @@ -301,9 +301,9 @@ def test_file_url(self):
url = self.banklist_data
dfs = self.run_read_html('file://' + url, 'First',
attrs={'id': 'table'})
self.assertIsInstance(dfs, list)
self.assert_(isinstance(dfs, list))
for df in dfs:
self.assertIsInstance(df, DataFrame)
self.assert_(isinstance(df, DataFrame))

@slow
def test_invalid_table_attrs(self):
Expand All @@ -319,38 +319,38 @@ def _bank_data(self, *args, **kwargs):
@slow
def test_multiindex_header(self):
df = self._bank_data(header=[0, 1])[0]
self.assertIsInstance(df.columns, MultiIndex)
self.assert_(isinstance(df.columns, MultiIndex))

@slow
def test_multiindex_index(self):
df = self._bank_data(index_col=[0, 1])[0]
self.assertIsInstance(df.index, MultiIndex)
self.assert_(isinstance(df.index, MultiIndex))

@slow
def test_multiindex_header_index(self):
df = self._bank_data(header=[0, 1], index_col=[0, 1])[0]
self.assertIsInstance(df.columns, MultiIndex)
self.assertIsInstance(df.index, MultiIndex)
self.assert_(isinstance(df.columns, MultiIndex))
self.assert_(isinstance(df.index, MultiIndex))

@slow
def test_multiindex_header_skiprows(self):
df = self._bank_data(header=[0, 1], skiprows=1)[0]
self.assertIsInstance(df.columns, MultiIndex)
self.assert_(isinstance(df.columns, MultiIndex))

@slow
def test_multiindex_header_index_skiprows(self):
df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0]
self.assertIsInstance(df.index, MultiIndex)
self.assert_(isinstance(df.index, MultiIndex))

@slow
def test_regex_idempotency(self):
url = self.banklist_data
dfs = self.run_read_html('file://' + url,
match=re.compile(re.compile('Florida')),
attrs={'id': 'table'})
self.assertIsInstance(dfs, list)
self.assert_(isinstance(dfs, list))
for df in dfs:
self.assertIsInstance(df, DataFrame)
self.assert_(isinstance(df, DataFrame))

def test_negative_skiprows_spam(self):
url = self.spam_data
Expand All @@ -367,15 +367,15 @@ def test_multiple_matches(self):
url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins'
dfs = self.run_read_html(url, match='Python',
attrs={'class': 'wikitable'})
self.assertGreater(len(dfs), 1)
self.assert_(len(dfs) > 1)

@network
def test_pythonxy_plugins_table(self):
url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins'
dfs = self.run_read_html(url, match='Python',
attrs={'class': 'wikitable'})
zz = [df.iloc[0, 0] for df in dfs]
self.assertListEqual(sorted(zz), sorted(['Python', 'SciTE']))
self.assertEqual(sorted(zz), sorted(['Python', 'SciTE']))

@slow
def test_banklist_header(self):
Expand All @@ -391,7 +391,7 @@ def try_remove_ws(x):
ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
converters={'Updated Date': Timestamp,
'Closing Date': Timestamp})
self.assertTupleEqual(df.shape, ground_truth.shape)
self.assertEqual(df.shape, ground_truth.shape)
old = ['First Vietnamese American BankIn Vietnamese',
'Westernbank Puerto RicoEn Espanol',
'R-G Premier Bank of Puerto RicoEn Espanol',
Expand Down Expand Up @@ -422,7 +422,7 @@ def test_gold_canyon(self):
self.assert_(gc in raw_text)
df = self.run_read_html(self.banklist_data, 'Gold Canyon',
attrs={'id': 'table'}, infer_types=False)[0]
self.assertIn(gc, df.to_string())
self.assert_(gc in df.to_string())


class TestReadHtmlLxml(TestCase):
Expand All @@ -449,8 +449,8 @@ def test_banklist_data_fail(self):
def test_works_on_valid_markup(self):
filename = os.path.join(DATA_PATH, 'valid_markup.html')
dfs = self.run_read_html(filename, index_col=0, flavor=['lxml'])
self.assertIsInstance(dfs, list)
self.assertIsInstance(dfs[0], DataFrame)
self.assert_(isinstance(dfs, list))
self.assert_(isinstance(dfs[0], DataFrame))

def setUp(self):
self.try_skip()
Expand Down

0 comments on commit a8f52f9

Please sign in to comment.