Final minor bug fixes. (#290)

* Final minor bug fixes. * More small fixes. - draw_table now just uses the first 20 rows to figure out column widths. - Handle non-datetime objects in rel_time * Fixed yaml_config. * Addressed comments.
hpc · Jul 16, 2020 · e8e20dd · e8e20dd
1 parent 8c332eb
commit e8e20dd
Show file tree

Hide file tree

Showing 6 changed files with 83 additions and 22 deletions.
diff --git a/docs/install.rst b/docs/install.rst
@@ -6,7 +6,7 @@ Installing Pavilion
 ===================
 
 Installing Pavilion is mostly a matter of placing it's source somewhere,
-providing it's (few) dependencies, and creating a pavilion.yaml config
+providing its (few) dependencies, and creating a pavilion.yaml config
 file.
 
 .. contents::
@@ -16,8 +16,9 @@ Requirements
 
 Pavilion has very few dependencies and requirements:
 
-- Python 3.4 or newer
-- A writeable space on a filesystem shared across all (tested) hosts in each cluster.
+- Python 3.5 or newer
+- A writeable space on a filesystem shared across all (tested) hosts in
+  each cluster. (Assuming you're scheduling jobs across a cluster).
 
   - The path to this directory must be consistent across all cluster hosts.
   - It must support atomic file creation and appends of < 4kb.
@@ -30,7 +31,7 @@ Filesystems
 
 Pavilion works by recursively running itself in different modes at
 different points in the testing process. This means certain paths, like
-the Pavilion **root directory**, **working directory**, and used
+the Pavilion **source directory**, **working directory**, and used
 **config directories** must have paths that are consistent across the
 nodes and front-ends of any given system.
 
@@ -45,6 +46,16 @@ to ``~/.pavilion/``).
   your cluster NFS partitions. Lustre filesystems are not recommended, mostly
   due to the type of load Pavilion presents to these.
 
+Testing Filesystems
+~~~~~~~~~~~~~~~~~~~
+
+If you're unsure if your shared filesystem is reliable, there's a test for
+that in `test/utils`.
+
+.. code-block:: bash
+
+    $ python3 lock_test.py --help
+
 Result Log
 ~~~~~~~~~~
 
@@ -84,6 +95,21 @@ starts with a git pull of the latest release of Pavilion.
 
 You can also simply download and extract the source.
 
+Releases
+~~~~~~~~
+
+You should probably pick the latest Pavilion *release* when installing
+Pavilion for a couple reasons.
+
+ 1) While we try to maintain backwards compatibility as much as possible,
+    the reality is that every release contains several major compatibility
+    breaks both for test configurations and plugins. These are documented
+    per-release in the `_static/RELEASE.txt`_ file.
+ 2) We run a large bevy of unit tests against every change in Pavilion, but
+    each release is used in production before it is actually tagged. This
+    often reveals bug, regressions, and practical usage issues. We fix those
+    issues, then tag the release. Bugfix releases are provided as needed.
+
 Dependencies
 ------------
 
@@ -95,10 +121,10 @@ supported and tests versions for each are recorded in ``requirements.txt``.
 
 -  `yaml\_config <https://github.com/lanl/yaml_config>`__ **(required)**
    - Used to define the test and pavilion configurations.
--  `**yc\_yaml** <https://github.com/pflarr/yc_yaml>`__ **(required)** - A
-   modified pyyaml used by yaml\_config.
--  `**yapsy** <http://yapsy.sourceforge.net/>`__ **(required)** - The basis
+-  `yapsy <http://yapsy.sourceforge.net/>`__ **(required)** - The basis
    for Pavilion's plugin architecture.
+-  `lark <https://github.com/lark-parser/lark>`__ **(required)** - Used for
+   Pavilion string value and expression parsing.
 -  `requests <https://pypi.org/project/requests/2.7.0/>`__ - Used for
    automatic downloads of test source files. This feature is disabled in
    the absence of this library, and tests that use it will fail with an

diff --git a/lib/pavilion/output.py b/lib/pavilion/output.py
@@ -707,17 +707,20 @@ def dt_auto_widths(rows, table_width, min_widths, max_widths):
     the column that will benefit the most from a single character of width
     increase. In case of a tie, two characters of width are considered, and
     so on. Remaining extra space is distributed amongst the final tied
-    columns."""
+    columns. To limit how long this takes, this makes a best guess using  
+    the first 20 rows."""
 
     fields = list(min_widths.keys())
 
     extra_spaces = table_width - sum(min_widths.values())
 
     final_widths = min_widths.copy()
 
+    # Limit to just the first 20 rows for speed.
+    rows = rows[:20]
+
     def calc_wraps(fld_, width_):
-        """Calculate the wraps for a given field at the given width across
-        all rows."""
+        """Calculate the wraps for a given field at the given width."""
         return sum([len(row_[fld_].wrap(width=width_))
                     for row_ in rows])
 

diff --git a/lib/pavilion/plugins/commands/status.py b/lib/pavilion/plugins/commands/status.py
@@ -83,7 +83,7 @@ def get_all_tests(pav_cfg, args):
                 'test_id': test_id,
                 'name':    "",
                 'state':   STATES.UNKNOWN,
-                'time':    "",
+                'time':    None,
                 'note':    "Test not found: {}".format(err)
             })
 
@@ -162,7 +162,7 @@ def get_statuses(pav_cfg, args, errfile):
                 'test_id': test_id,
                 'name':    "",
                 'state':   STATES.UNKNOWN,
-                'time':    "",
+                'time':    None,
                 'note':    "Error loading test: {}".format(err),
             })
 

diff --git a/lib/pavilion/plugins/commands/wait.py b/lib/pavilion/plugins/commands/wait.py
@@ -35,7 +35,7 @@ def _setup_arguments(self, parser):
         )
         parser.add_argument(
             '-t', '--timeout', action='store',
-            help='Maximum time to wait for results in seconds. Default is to'
+            help='Maximum time to wait for results in seconds. Default is to '
                  'wait indefinitely.'
         )
         parser.add_argument(

diff --git a/lib/pavilion/utils.py b/lib/pavilion/utils.py
@@ -130,14 +130,16 @@ def get_login():
     """Get the current user's login, either through os.getlogin or
     the environment, or the id command."""
 
+    # We've found this to be generally more reliable in sudo situations
+    # than getlogin.
+    if 'USER' in os.environ:
+        return os.environ['USER']
+
     try:
         return os.getlogin()
     except OSError:
         pass
 
-    if 'USER' in os.environ:
-        return os.environ['USER']
-
     try:
         name = subprocess.check_output(['id', '-un'],
                                        stderr=subprocess.DEVNULL)

diff --git a/test/utils/lock_test.py b/test/utils/lock_test.py
@@ -1,12 +1,40 @@
-"""Run this simultaniously on multiple hosts that share an NFS filesystem to test cross-system NFS
-locking. The tests do a synchronized start at the top of the minute according to the system clock, so 
-make sure the system clocks are close (within .5 seconds will do). Any locking errors will be
-printed."""
+"""
+Multi-host lockfile test.
+
+Usage: python3 lock_test.py <lockfile_dir>
+
+Run this simultaniously on multiple hosts that share an NFS filesystem 
+to test cross-system NFS locking. The tests do a synchronized start at the top 
+of the minute according to the system clock, so make sure the system clocks are close 
+(within .5 seconds will do). Any locking errors will be
+printed.
+
+The shared lockfile is placed in this directory
+
+The test should complete on all systems without errors.
+
+"""
 
-from pavilion import lockfile
 from pathlib import Path
+import sys
+libdir = (Path(__file__).resolve().parents[2]/'lib').as_posix()
+sys.path.append(libdir)
+
+from pavilion import lockfile
 import time
 
+lock_dir = None
+if len(sys.argv) == 2:
+    try:
+        lock_dir = Path(sys.argv[1])
+    except:
+        pass
+
+if ('--help' in sys.argv or '-h' in sys.argv
+        or lock_dir is None or not lock_dir.exists()):
+    print(__doc__)
+    sys.exit(1)
+
 acquires = 500
 acquired = 0
 
@@ -20,12 +48,14 @@
 
 print('starting', time.time(), flush=True)
 
+lock_path = lock_dir/'test.lockfile'
+
 # Acquire a bunch of locks to give plenty of chances for things to break.
 # More locking attempts also mean more time for runs on multiple systems 
 # to overlap.
 while acquired < acquires:
     start = time.time()
-    with lockfile.LockFile('/usr/projects/hpctest/.locktest'):
+    with lockfile.LockFile(lock_path):
         acquire_times.append(time.time() - start)
         print(".", end="", flush=True)
         acquired += 1