diff --git a/.travis.yml b/.travis.yml index ac889bcd8..48141f13a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -53,6 +53,23 @@ matrix: - mpich - libmpich-dev - libhdf5-mpich-dev + # test MPI with latest released version + - python: 3.7 + dist: xenial + env: + - MPI=1 + - CC=mpicc.mpich + - DEPENDS="numpy>=1.9.0 cython>=0.21 setuptools>=18.0 mpi4py>=1.3.1 cftime" + - NETCDF_VERSION=4.6.3 + - PNETCDF_VERSION=1.11.0 + - NETCDF_DIR=$HOME + - PATH=${NETCDF_DIR}/bin:${PATH} # pick up nc-config here + addons: + apt: + packages: + - mpich + - libmpich-dev + - libhdf5-mpich-dev # test with netcdf-c from github master - python: 3.7 dist: xenial @@ -86,6 +103,8 @@ script: - cd test - python run_all.py - | + echo "MPI = ${MPI}" + echo "PNETCDF_VERSION = ${PNETCDF_VERSION}" if [ $MPI -eq 1 ] ; then cd ../examples mpirun.mpich -np 4 python mpi_example.py @@ -94,6 +113,14 @@ script: exit 1 else echo "mpi test passed!" - exit 0 + fi + if [ -n "${PNETCDF_VERSION}" ] ; then + mpirun.mpich -np 4 python mpi_example.py NETCDF3_64BIT_DATA + if [ $? -ne 0 ] ; then + echo "PnetCDF mpi test failed!" + exit 1 + else + echo "PnetCDF mpi test passed!" + fi fi fi diff --git a/Changelog b/Changelog index 463addfa2..8f38449f8 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,8 @@ + current master +=============== + * added support for parallel IO in the classic netcdf-3 formats through the + pnetcdf library. + version 1.4.3.2 (tag v1.4.3.2) =============================== * include missing membuf.pyx file in release source tarball. diff --git a/ci/travis/build-parallel-netcdf.sh b/ci/travis/build-parallel-netcdf.sh index 4035351fc..a9c211a09 100755 --- a/ci/travis/build-parallel-netcdf.sh +++ b/ci/travis/build-parallel-netcdf.sh @@ -2,8 +2,19 @@ set -e -echo "Using downloaded netCDF version ${NETCDF_VERSION} with parallel capabilities enabled" pushd /tmp +if [ -n "${PNETCDF_VERSION}" ]; then + echo "Using downloaded PnetCDF version ${PNETCDF_VERSION}" + wget https://parallel-netcdf.github.io/Release/pnetcdf-${PNETCDF_VERSION}.tar.gz + tar -xzf pnetcdf-${PNETCDF_VERSION}.tar.gz + pushd pnetcdf-${PNETCDF_VERSION} + ./configure --prefix $NETCDF_DIR --enable-shared --disable-fortran --disable-cxx + NETCDF_EXTRA_CONFIG="--enable-pnetcdf" + make -j 2 + make install + popd +fi +echo "Using downloaded netCDF version ${NETCDF_VERSION} with parallel capabilities enabled" if [ ${NETCDF_VERSION} == "GITMASTER" ]; then git clone http://github.com/Unidata/netcdf-c netcdf-c pushd netcdf-c @@ -14,9 +25,10 @@ else pushd netcdf-c-${NETCDF_VERSION} fi # for Ubuntu xenial -export CPPFLAGS="-I/usr/include/hdf5/mpich" +export CPPFLAGS="-I/usr/include/hdf5/mpich -I${NETCDF_DIR}/include" +export LDFLAGS="-L${NETCDF_DIR}/lib" export LIBS="-lhdf5_mpich_hl -lhdf5_mpich -lm -lz" -./configure --prefix $NETCDF_DIR --enable-netcdf-4 --enable-shared --disable-dap --enable-parallel +./configure --prefix $NETCDF_DIR --enable-netcdf-4 --enable-shared --disable-dap --enable-parallel4 $NETCDF_EXTRA_CONFIG make -j 2 make install popd diff --git a/examples/mpi_example.py b/examples/mpi_example.py index 885984882..0bebfe675 100644 --- a/examples/mpi_example.py +++ b/examples/mpi_example.py @@ -1,10 +1,17 @@ # to run: mpirun -np 4 python mpi_example.py +import sys from mpi4py import MPI import numpy as np from netCDF4 import Dataset +if len(sys.argv) == 2: + format = sys.argv[1] +else: + format = 'NETCDF4_CLASSIC' rank = MPI.COMM_WORLD.rank # The process ID (integer 0-3 for 4-process run) +if rank == 0: + print('Creating file with format {}'.format(format)) nc = Dataset('parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD, - info=MPI.Info(),format='NETCDF4_CLASSIC') + info=MPI.Info(),format=format) # below should work also - MPI_COMM_WORLD and MPI_INFO_NULL will be used. #nc = Dataset('parallel_test.nc', 'w', parallel=True) d = nc.createDimension('dim',4) diff --git a/include/netCDF4.pxi b/include/netCDF4.pxi index 3b8cde18e..625752a68 100644 --- a/include/netCDF4.pxi +++ b/include/netCDF4.pxi @@ -51,6 +51,7 @@ cdef extern from "netcdf.h": NC_CLOBBER NC_NOCLOBBER # Don't destroy existing file on create NC_64BIT_OFFSET # Use large (64-bit) file offsets + NC_64BIT_DATA # Use cdf-5 format NC_NETCDF4 # Use netCDF-4/HDF5 format NC_CLASSIC_MODEL # Enforce strict netcdf-3 rules. # Use these 'mode' flags for both nc_create and nc_open. @@ -703,7 +704,7 @@ IF HAS_NC_CREATE_MEM: int flags int nc_close_memio(int ncid, NC_memio* info); -IF HAS_NC_PAR: +IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: cdef extern from "mpi-compat.h": pass cdef extern from "netcdf_par.h": ctypedef int MPI_Comm diff --git a/netCDF4/__init__.py b/netCDF4/__init__.py index e2660d13c..4bdbdde38 100644 --- a/netCDF4/__init__.py +++ b/netCDF4/__init__.py @@ -6,6 +6,7 @@ from ._netCDF4 import (__version__, __netcdf4libversion__, __hdf5libversion__, __has_rename_grp__, __has_nc_inq_path__, __has_nc_inq_format_extended__, __has_nc_open_mem__, - __has_nc_create_mem__,__has_cdf5_format__,__has_nc_par__) + __has_nc_create_mem__, __has_cdf5_format__, + __has_parallel4_support__, __has_pnetcdf_support__) __all__ =\ ['Dataset','Variable','Dimension','Group','MFDataset','MFTime','CompoundType','VLType','date2num','num2date','date2index','stringtochar','chartostring','stringtoarr','getlibversion','EnumType'] diff --git a/netCDF4/_netCDF4.pyx b/netCDF4/_netCDF4.pyx index bd8a83f1e..29563c290 100644 --- a/netCDF4/_netCDF4.pyx +++ b/netCDF4/_netCDF4.pyx @@ -63,9 +63,10 @@ Requires If you want [OPeNDAP](http://opendap.org) support, add `--enable-dap`. If you want HDF4 SD support, add `--enable-hdf4` and add the location of the HDF4 headers and library to `$CPPFLAGS` and `$LDFLAGS`. - - for MPI parallel IO support, MPI-enabled versions of the HDF5 and netcdf - libraries are required, as is the [mpi4py](http://mpi4py.scipy.org) python - module. + - for MPI parallel IO support, an MPI-enabled versions of the netcdf library + is required, as is the [mpi4py](http://mpi4py.scipy.org) python module. + Parallel IO further depends on the existence of MPI-enabled HDF5 or the + [PnetCDF](https://parallel-netcdf.github.io/) library. Install @@ -918,13 +919,14 @@ specified names. ##
13) Parallel IO. -If MPI parallel enabled versions of netcdf and hdf5 are detected, and -[mpi4py](https://mpi4py.scipy.org) is installed, netcdf4-python will -be built with parallel IO capabilities enabled. Since parallel IO -uses features of HDF5, it can only be used with NETCDF4 or -NETCDF4_CLASSIC formatted files. To use parallel IO, -your program must be running in an MPI environment using -[mpi4py](https://mpi4py.scipy.org). +If MPI parallel enabled versions of netcdf and hdf5 or pnetcdf are detected, +and [mpi4py](https://mpi4py.scipy.org) is installed, netcdf4-python will +be built with parallel IO capabilities enabled. Parallel IO of NETCDF4 or +NETCDF4_CLASSIC formatted files is only available if the MPI parallel HDF5 +library is available. Parallel IO of classic netcdf-3 file formats is only +available if the [PnetCDF](https://parallel-netcdf.github.io/) library is +available. To use parallel IO, your program must be running in an MPI +environment using [mpi4py](https://mpi4py.scipy.org). :::python >>> from mpi4py import MPI @@ -971,9 +973,12 @@ participate in doing IO. To toggle back and forth between the two types of IO, use the `netCDF4.Variable.set_collective` `netCDF4.Variable`method. All metadata operations (such as creation of groups, types, variables, dimensions, or attributes) -are collective. There are a couple of important limitatons of parallel IO: +are collective. There are a couple of important limitations of parallel IO: - - only works with NETCDF4 or NETCDF4_CLASSIC formatted files. + - parallel IO for NETCDF4 or NETCDF4_CLASSIC formatted files is only available + if the netcdf library was compiled with MPI enabled HDF5. + - parallel IO for all classic netcdf-3 file formats is only available if the + netcdf library was compiled with PnetCDF. - If a variable has an unlimited dimension, appending data must be done in collective mode. If the write is done in independent mode, the operation will fail with a a generic "HDF Error". @@ -1202,7 +1207,7 @@ import_array() include "constants.pyx" include "membuf.pyx" include "netCDF4.pxi" -IF HAS_NC_PAR: +IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: cimport mpi4py.MPI as MPI from mpi4py.libmpi cimport MPI_Comm, MPI_Info, MPI_Comm_dup, MPI_Info_dup, \ MPI_Comm_free, MPI_Info_free, MPI_INFO_NULL,\ @@ -1240,7 +1245,8 @@ __has_nc_inq_format_extended__ = HAS_NC_INQ_FORMAT_EXTENDED __has_cdf5_format__ = HAS_CDF5_FORMAT __has_nc_open_mem__ = HAS_NC_OPEN_MEM __has_nc_create_mem__ = HAS_NC_CREATE_MEM -__has_nc_par__ = HAS_NC_PAR +__has_parallel4_support__ = HAS_PARALLEL4_SUPPORT +__has_pnetcdf_support__ = HAS_PNETCDF_SUPPORT _needsworkaround_issue485 = __netcdf4libversion__ < "4.4.0" or \ (__netcdf4libversion__.startswith("4.4.0") and \ "-development" in __netcdf4libversion__) @@ -1282,20 +1288,29 @@ _intnptonctype = {'i1' : NC_BYTE, _format_dict = {'NETCDF3_CLASSIC' : NC_FORMAT_CLASSIC, 'NETCDF4_CLASSIC' : NC_FORMAT_NETCDF4_CLASSIC, 'NETCDF4' : NC_FORMAT_NETCDF4} +# create dictionary mapping string identifiers to netcdf create format codes +_cmode_dict = {'NETCDF3_CLASSIC' : NC_CLASSIC_MODEL, + 'NETCDF4_CLASSIC' : NC_CLASSIC_MODEL | NC_NETCDF4, + 'NETCDF4' : NC_NETCDF4} IF HAS_CDF5_FORMAT: # NETCDF3_64BIT deprecated, saved for compatibility. # use NETCDF3_64BIT_OFFSET instead. _format_dict['NETCDF3_64BIT_OFFSET'] = NC_FORMAT_64BIT_OFFSET _format_dict['NETCDF3_64BIT_DATA'] = NC_FORMAT_64BIT_DATA + _cmode_dict['NETCDF3_64BIT_OFFSET'] = NC_64BIT_OFFSET + _cmode_dict['NETCDF3_64BIT_DATA'] = NC_64BIT_DATA ELSE: _format_dict['NETCDF3_64BIT'] = NC_FORMAT_64BIT + _cmode_dict['NETCDF3_64BIT'] = NC_64BIT_OFFSET # invert dictionary mapping _reverse_format_dict = dict((v, k) for k, v in _format_dict.iteritems()) # add duplicate entry (NETCDF3_64BIT == NETCDF3_64BIT_OFFSET) IF HAS_CDF5_FORMAT: _format_dict['NETCDF3_64BIT'] = NC_FORMAT_64BIT_OFFSET + _cmode_dict['NETCDF3_64BIT'] = NC_64BIT_OFFSET ELSE: _format_dict['NETCDF3_64BIT_OFFSET'] = NC_FORMAT_64BIT + _cmode_dict['NETCDF3_64BIT_OFFSET'] = NC_64BIT_OFFSET # default fill_value to numpy datatype mapping. default_fillvals = {#'S1':NC_FILL_CHAR, @@ -2084,7 +2099,7 @@ strings. cdef char *path cdef char namstring[NC_MAX_NAME+1] cdef int cmode - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: cdef MPI_Comm mpicomm cdef MPI_Info mpiinfo @@ -2107,12 +2122,20 @@ strings. raise ValueError(msg) if parallel: - IF HAS_NC_PAR != 1: + IF HAS_PARALLEL4_SUPPORT != 1 and HAS_PNETCDF_SUPPORT != 1: msg='parallel mode requires MPI enabled netcdf-c' raise ValueError(msg) ELSE: - if format not in ['NETCDF4','NETCDF4_CLASSIC']: - msg='parallel mode only works with format=NETCDF4 or NETCDF4_CLASSIC' + parallel_formats = [] + IF HAS_PARALLEL4_SUPPORT: + parallel_formats += ['NETCDF4','NETCDF4_CLASSIC'] + IF HAS_PNETCDF_SUPPORT: + parallel_formats += ['NETCDF3_CLASSIC', + 'NETCDF3_64BIT_OFFSET', + 'NETCDF3_64BIT_DATA', + 'NETCDF3_64BIT'] + if format not in parallel_formats: + msg='parallel mode only works with the following formats: ' + ' '.join(parallel_formats) raise ValueError(msg) if comm is not None: mpicomm = comm.ob_mpi @@ -2122,9 +2145,7 @@ strings. mpiinfo = info.ob_mpi else: mpiinfo = MPI_INFO_NULL - cmode = NC_MPIIO | NC_NETCDF4 - if format == 'NETCDF4_CLASSIC': - cmode = cmode | NC_CLASSIC_MODEL + cmode = NC_MPIIO | _cmode_dict[format] self._inmemory = False if mode == 'w': @@ -2144,7 +2165,7 @@ strings. else: if clobber: if parallel: - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: ierr = nc_create_par(path, NC_CLOBBER | cmode, \ mpicomm, mpiinfo, &grpid) ELSE: @@ -2159,7 +2180,7 @@ strings. ierr = nc_create(path, NC_CLOBBER, &grpid) else: if parallel: - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: ierr = nc_create_par(path, NC_NOCLOBBER | cmode, \ mpicomm, mpiinfo, &grpid) ELSE: @@ -2194,7 +2215,7 @@ strings. version 4.4.1 or higher of the netcdf C lib, and rebuild netcdf4-python.""" raise ValueError(msg) elif parallel: - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: ierr = nc_open_par(path, NC_NOWRITE | NC_MPIIO, \ mpicomm, mpiinfo, &grpid) ELSE: @@ -2205,7 +2226,7 @@ strings. ierr = nc_open(path, NC_NOWRITE, &grpid) elif mode == 'r+' or mode == 'a': if parallel: - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: ierr = nc_open_par(path, NC_WRITE | NC_MPIIO, \ mpicomm, mpiinfo, &grpid) ELSE: @@ -2217,7 +2238,7 @@ strings. elif mode == 'as' or mode == 'r+s': if parallel: # NC_SHARE ignored - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: ierr = nc_open_par(path, NC_WRITE | NC_MPIIO, \ mpicomm, mpiinfo, &grpid) ELSE: @@ -2231,7 +2252,7 @@ strings. if clobber: if parallel: # NC_SHARE ignored - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: ierr = nc_create_par(path, NC_CLOBBER | cmode, \ mpicomm, mpiinfo, &grpid) ELSE: @@ -2246,7 +2267,7 @@ strings. else: if parallel: # NC_SHARE ignored - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: ierr = nc_create_par(path, NC_NOCLOBBER | cmode, \ mpicomm, mpiinfo, &grpid) ELSE: @@ -5345,7 +5366,7 @@ NC_CHAR). turn on or off collective parallel IO access. Ignored if file is not open for parallel access. """ - IF HAS_NC_PAR: + IF HAS_PARALLEL4_SUPPORT or HAS_PNETCDF_SUPPORT: # set collective MPI IO mode on or off if value: ierr = nc_var_par_access(self._grpid, self._varid, diff --git a/setup.py b/setup.py index cf5013d4e..8f1ade563 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,8 @@ def check_api(inc_dirs): has_cdf5_format = False has_nc_open_mem = False has_nc_create_mem = False - has_nc_par = False + has_parallel4_support = False + has_pnetcdf_support = False for d in inc_dirs: try: @@ -65,7 +66,6 @@ def check_api(inc_dirs): continue has_nc_open_mem = os.path.exists(os.path.join(d, 'netcdf_mem.h')) - has_nc_par = os.path.exists(os.path.join(d, 'netcdf_par.h')) for line in f: if line.startswith('nc_rename_grp'): @@ -91,10 +91,15 @@ def check_api(inc_dirs): for line in open(ncmetapath): if line.startswith('#define NC_HAS_CDF5'): has_cdf5_format = bool(int(line.split()[2])) + elif line.startswith('#define NC_HAS_PARALLEL4'): + has_parallel4_support = bool(int(line.split()[2])) + elif line.startswith('#define NC_HAS_PNETCDF'): + has_pnetcdf_support = bool(int(line.split()[2])) break return has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \ - has_cdf5_format, has_nc_open_mem, has_nc_create_mem, has_nc_par + has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \ + has_parallel4_support, has_pnetcdf_support def getnetcdfvers(libdirs): @@ -488,7 +493,8 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): os.remove(netcdf4_src_c) # this determines whether renameGroup and filepath methods will work. has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \ - has_cdf5_format, has_nc_open_mem, has_nc_create_mem, has_nc_par = check_api(inc_dirs) + has_cdf5_format, has_nc_open_mem, has_nc_create_mem, \ + has_parallel4_support, has_pnetcdf_support = check_api(inc_dirs) # for netcdf 4.4.x CDF5 format is always enabled. if netcdf_lib_version is not None and\ (netcdf_lib_version > "4.4" and netcdf_lib_version < "4.5"): @@ -498,7 +504,8 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): try: import mpi4py except ImportError: - has_nc_par = False + has_parallel4_support = False + has_pnetcdf_support = False f = open(osp.join('include', 'constants.pyx'), 'w') if has_rename_grp: @@ -544,16 +551,23 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): sys.stdout.write('netcdf lib does not have cdf-5 format capability\n') f.write('DEF HAS_CDF5_FORMAT = 0\n') - if has_nc_par: + if has_parallel4_support: sys.stdout.write('netcdf lib has netcdf4 parallel functions\n') - f.write('DEF HAS_NC_PAR = 1\n') + f.write('DEF HAS_PARALLEL4_SUPPORT = 1\n') else: sys.stdout.write('netcdf lib does not have netcdf4 parallel functions\n') - f.write('DEF HAS_NC_PAR = 0\n') + f.write('DEF HAS_PARALLEL4_SUPPORT = 0\n') + + if has_pnetcdf_support: + sys.stdout.write('netcdf lib has pnetcdf parallel functions\n') + f.write('DEF HAS_PNETCDF_SUPPORT = 1\n') + else: + sys.stdout.write('netcdf lib does not have pnetcdf parallel functions\n') + f.write('DEF HAS_PNETCDF_SUPPORT = 0\n') f.close() - if has_nc_par: + if has_parallel4_support or has_pnetcdf_support: inc_dirs.append(mpi4py.get_include()) # mpi_incdir should not be needed if using nc-config # (should be included in nc-config --cflags) diff --git a/test/run_all.py b/test/run_all.py index 2388d5fde..c229039c9 100755 --- a/test/run_all.py +++ b/test/run_all.py @@ -1,7 +1,7 @@ import glob, os, sys, unittest, struct from netCDF4 import getlibversion,__hdf5libversion__,__netcdf4libversion__,__version__ -from netCDF4 import __has_cdf5_format__, __has_nc_inq_path__, __has_nc_par__,\ - __has_nc_create_mem__ +from netCDF4 import __has_cdf5_format__, __has_nc_inq_path__, __has_nc_create_mem__, \ + __has_parallel4_support__, __has_pnetcdf_support__ # can also just run # python -m unittest discover . 'tst*py' @@ -16,7 +16,7 @@ else: test_files.remove('tst_unicode3.py') sys.stdout.write('not running tst_unicode3.py ...\n') -if __netcdf4libversion__ < '4.2.1' or __has_nc_par__: +if __netcdf4libversion__ < '4.2.1' or __has_parallel4_support__ or __has_pnetcdf_support__: test_files.remove('tst_diskless.py') sys.stdout.write('not running tst_diskless.py ...\n') if not __has_nc_inq_path__: