From d57c9575ba4e8afbfa56e1dbc5eb542a7896a962 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Wed, 14 Feb 2018 15:43:27 -0700 Subject: [PATCH] handle pnetcdf/netcdf4p errors better --- src/clib/pioc_support.c | 53 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/src/clib/pioc_support.c b/src/clib/pioc_support.c index fcac24d122a..6e973aa71bb 100644 --- a/src/clib/pioc_support.c +++ b/src/clib/pioc_support.c @@ -410,10 +410,9 @@ int check_netcdf2(iosystem_desc_t *ios, file_desc_t *file, int status, /* User must provide this. */ pioassert(fname, "code file name must be provided", __FILE__, __LINE__); - /* No harm, no foul. */ - if (status == PIO_NOERR) - return PIO_NOERR; - + if (file && file->iosystem->ioproc && + (file->iotype == PIO_IOTYPE_PNETCDF || file->iotype == PIO_IOTYPE_NETCDF4P)) + MPI_Reduce(MPI_IN_PLACE, &status, 1, MPI_INT, MPI_MIN, 0, file->iosystem->io_comm); LOG((1, "check_netcdf2 status = %d fname = %s line = %d", status, fname, line)); /* Pick an error handler. */ @@ -1999,7 +1998,7 @@ int inq_file_metadata(file_desc_t *file, int ncid, int iotype, int *nvars, int * if (nunlimdims) { if (!(unlimdimids = malloc(nunlimdims * sizeof(int)))) - return pio_err(NULL, file, PIO_ENOMEM, __FILE__, __LINE__); + return pio_err(NULL, file, PIO_ENOMEM, __FILE__, __LINE__); if (iotype == PIO_IOTYPE_PNETCDF || iotype == PIO_IOTYPE_NETCDF) { unlimdimids[0] = unlimdimid; @@ -2025,19 +2024,19 @@ int inq_file_metadata(file_desc_t *file, int ncid, int iotype, int *nvars, int * { #ifdef _PNETCDF PIO_Offset type_size; - + if ((ret = ncmpi_inq_var(ncid, v, NULL, &my_type, &var_ndims, NULL, NULL))) return pio_err(NULL, file, ret, __FILE__, __LINE__); (*pio_type)[v] = (int)my_type; if ((ret = pioc_pnetcdf_inq_type(ncid, (*pio_type)[v], NULL, &type_size))) return check_netcdf(file, ret, __FILE__, __LINE__); (*pio_type_size)[v] = type_size; -#endif /* _PNETCDF */ +#endif /* _PNETCDF */ } else { size_t type_size; - + if ((ret = nc_inq_var(ncid, v, NULL, &my_type, &var_ndims, NULL, NULL))) return pio_err(NULL, file, ret, __FILE__, __LINE__); (*pio_type)[v] = (int)my_type; @@ -2063,14 +2062,14 @@ int inq_file_metadata(file_desc_t *file, int ncid, int iotype, int *nvars, int * #ifdef _PNETCDF if ((ret = ncmpi_inq_vardimid(ncid, v, var_dimids))) return pio_err(NULL, file, ret, __FILE__, __LINE__); -#endif /* _PNETCDF */ +#endif /* _PNETCDF */ } else { if ((ret = nc_inq_vardimid(ncid, v, var_dimids))) return pio_err(NULL, file, ret, __FILE__, __LINE__); } - + /* Check against each variable dimid agains each unlimited * dimid. */ for (int d = 0; d < var_ndims; d++) @@ -2094,19 +2093,19 @@ int inq_file_metadata(file_desc_t *file, int ncid, int iotype, int *nvars, int * (*rec_var)[v] = 1; else return pio_err(NULL, file, PIO_EINVAL, __FILE__, __LINE__); - + } else (*rec_var)[v] = 0; - + } } } /* next var */ - + /* Free resources. */ if (nunlimdims) free(unlimdimids); - + return PIO_NOERR; } @@ -2303,11 +2302,10 @@ int PIOc_openfile_retry(int iosysid, int *ncidp, int *iotype, const char *filena /* open netcdf file serially on main task */ if (ios->io_rank == 0) { - if ((ierr = nc_open(filename, mode, &file->fh))) - return pio_err(ios, file, ierr, __FILE__, __LINE__); - if ((ierr = inq_file_metadata(file, file->fh, PIO_IOTYPE_NETCDF, &nvars, &rec_var, &pio_type, - &pio_type_size, &mpi_type, &mpi_type_size))) - return pio_err(ios, file, ierr, __FILE__, __LINE__); + ierr = nc_open(filename, mode, &file->fh); + if (ierr == PIO_NOERR) + ierr = inq_file_metadata(file, file->fh, PIO_IOTYPE_NETCDF, &nvars, &rec_var, &pio_type, + &pio_type_size, &mpi_type, &mpi_type_size); } else file->do_io = 0; @@ -2318,8 +2316,9 @@ int PIOc_openfile_retry(int iosysid, int *ncidp, int *iotype, const char *filena } /* Broadcast and check the return code. */ - LOG((2, "Bcasting error code ierr %d ios->ioroot %d ios->my_comm %d", - ierr, ios->ioroot, ios->my_comm)); + if (ios->ioroot == ios->union_rank) + LOG((2, "Bcasting error code ierr %d ios->ioroot %d ios->my_comm %d", + ierr, ios->ioroot, ios->my_comm)); if ((mpierr = MPI_Bcast(&ierr, 1, MPI_INT, ios->ioroot, ios->my_comm))) return check_mpi(file, mpierr, __FILE__, __LINE__); LOG((2, "Bcast openfile_retry error code ierr = %d", ierr)); @@ -2342,7 +2341,7 @@ int PIOc_openfile_retry(int iosysid, int *ncidp, int *iotype, const char *filena if ((mpierr = MPI_Bcast(&pio_next_ncid, 1, MPI_INT, ios->ioroot, ios->my_comm))) return check_mpi(file, mpierr, __FILE__, __LINE__); } - + if ((mpierr = MPI_Bcast(&nvars, 1, MPI_INT, ios->ioroot, ios->my_comm))) return check_mpi(file, mpierr, __FILE__, __LINE__); @@ -2350,15 +2349,15 @@ int PIOc_openfile_retry(int iosysid, int *ncidp, int *iotype, const char *filena if (nvars && !rec_var) { if (!(rec_var = malloc(nvars * sizeof(int)))) - return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); + return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); if (!(pio_type = malloc(nvars * sizeof(int)))) - return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); + return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); if (!(pio_type_size = malloc(nvars * sizeof(int)))) - return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); + return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); if (!(mpi_type = malloc(nvars * sizeof(MPI_Datatype)))) - return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); + return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); if (!(mpi_type_size = malloc(nvars * sizeof(int)))) - return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); + return pio_err(ios, file, PIO_ENOMEM, __FILE__, __LINE__); } if (nvars) {