pydata · shoyer · Sep 6, 2016 · Aug 28, 2016 · Aug 31, 2016 · Aug 31, 2016
diff --git a/doc/api.rst b/doc/api.rst
@@ -405,7 +405,9 @@ DataArray methods
 .. autosummary::
    :toctree: generated/
 
+   open_dataarray
    DataArray.to_dataset
+   DataArray.to_netcdf
    DataArray.to_pandas
    DataArray.to_series
    DataArray.to_dataframe

diff --git a/doc/io.rst b/doc/io.rst
@@ -117,6 +117,14 @@ We can load netCDF files to create a new Dataset using
     ds_disk = xr.open_dataset('saved_on_disk.nc')
     ds_disk
 
+Similarly, a DataArray can be saved to disk using the
+:py:attr:`DataArray.to_netcdf <xarray.DataArray.to_netcdf>` method, and loaded
+from disk using the :py:func:`~xarray.open_dataarray` function. As netCDF files
+correspond to :py:class:`~xarray.Dataset` objects, these functions internally
+convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back
+when loading, ensuring that the ``DataArray`` that is loaded is always exactly
+the same as the one that was saved.
+
 A dataset can also be loaded or written to a specific group within a netCDF
 file. To load from a group, pass a ``group`` keyword argument to the
 ``open_dataset`` function. The group can be specified as a path-like

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -43,6 +43,13 @@ Enhancements
 error messages if they are invalid. (:issue:`911`).
 By `Robin Wilson <https://github.com/robintw>`_.
 
+- Added ability to save ``DataArray`` objects directly to netCDF files using
+  :py:meth:`~xarray.DataArray.to_netcdf`, and to load directly from netCDF files
+  using :py:func:`~xarray.open_dataarray` (:issue:`915`). These remove the need
+  to convert a ``DataArray`` to a ``Dataset`` before saving as a netCDF file,
+  and deals with names to ensure a perfect 'roundtrip' capability.
+  By `Robin Wilson <https://github.com/robintw`_.
+
 Bug fixes
 ~~~~~~~~~
 

diff --git a/xarray/__init__.py b/xarray/__init__.py
@@ -8,7 +8,8 @@
 from .core.merge import merge, MergeError
 from .core.options import set_options
 
-from .backends.api import open_dataset, open_mfdataset, save_mfdataset
+from .backends.api import (open_dataset, open_dataarray, open_mfdataset,
+                           save_mfdataset)
 from .conventions import decode_cf
 
 try:

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -15,6 +15,8 @@
 from ..core.utils import close_on_error, is_remote_uri
 from ..core.pycompat import basestring
 
+DATAARRAY_NAME = '__xarray_dataarray_name__'
+DATAARRAY_VARIABLE = '__xarray_dataarray_variable__'
 
 def _get_default_engine(path, allow_remote=False):
     if allow_remote and is_remote_uri(path):  # pragma: no cover
@@ -267,6 +269,107 @@ def maybe_decode_store(store, lock=False):
     return maybe_decode_store(store)
 
 
+def open_dataarray(filename_or_obj, group=None, decode_cf=True,
+                   mask_and_scale=True, decode_times=True,
+                   concat_characters=True, decode_coords=True, engine=None,
+                   chunks=None, lock=None, drop_variables=None):
+    """
+    Opens an DataArray from a netCDF file containing a single data variable.
+
+    This is designed to read netCDF files with only one data variable. If
+    multiple variables are present then a ValueError is raised.
+
+    Parameters
+    ----------
+    filename_or_obj : str, file or xarray.backends.*DataStore
+        Strings are interpreted as a path to a netCDF file or an OpenDAP URL
+        and opened with python-netCDF4, unless the filename ends with .gz, in
+        which case the file is gunzipped and opened with scipy.io.netcdf (only
+        netCDF3 supported). File-like objects are opened with scipy.io.netcdf
+        (only netCDF3 supported).
+    group : str, optional
+        Path to the netCDF4 group in the given file to open (only works for
+        netCDF4 files).
+    decode_cf : bool, optional
+        Whether to decode these variables, assuming they were saved according
+        to CF conventions.
+    mask_and_scale : bool, optional
+        If True, replace array values equal to `_FillValue` with NA and scale
+        values according to the formula `original_values * scale_factor +
+        add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
+        taken from variable attributes (if they exist).  If the `_FillValue` or
+        `missing_value` attribute contains multiple values a warning will be
+        issued and all array values matching one of the multiple values will
+        be replaced by NA.
+    decode_times : bool, optional
+        If True, decode times encoded in the standard NetCDF datetime format
+        into datetime objects. Otherwise, leave them encoded as numbers.
+    concat_characters : bool, optional
+        If True, concatenate along the last dimension of character arrays to
+        form string arrays. Dimensions will only be concatenated over (and
+        removed) if they have no corresponding variable and if they are only
+        used as the last dimension of character arrays.
+    decode_coords : bool, optional
+        If True, decode the 'coordinates' attribute to identify coordinates in
+        the resulting dataset.
+    engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio'}, optional
+        Engine to use when reading files. If not provided, the default engine
+        is chosen based on available dependencies, with a preference for
+        'netcdf4'.
+    chunks : int or dict, optional
+        If chunks is provided, it used to load the new dataset into dask
+        arrays. This is an experimental feature; see the documentation for more
+        details.
+    lock : False, True or threading.Lock, optional
+        If chunks is provided, this argument is passed on to
+        :py:func:`dask.array.from_array`. By default, a per-variable lock is
+        used when reading data from netCDF files with the netcdf4 and h5netcdf
+        engines to avoid issues with concurrent access when using dask's
+        multithreaded backend.
+    drop_variables: string or iterable, optional
+        A variable or list of variables to exclude from being parsed from the
+        dataset. This may be useful to drop variables with problems or
+        inconsistent values.
+
+    Notes
+    -----
+    This is designed to be fully compatible with `DataArray.to_netcdf`. Saving
+    using `DataArray.to_netcdf` and then loading with this function will
+    produce an identical result.
+
+    All parameters are passed directly to `xarray.open_dataset`. See that
+    documentation for further details.
+
+    See also
+    --------
+    open_dataset
+    """
+    dataset = open_dataset(filename_or_obj, group, decode_cf,
+                           mask_and_scale, decode_times,
+                           concat_characters, decode_coords, engine,
+                           chunks, lock, drop_variables)
+
+    if len(dataset.data_vars) != 1:
+        raise ValueError('Given file dataset contains more than one data '
+                         'variable. Please read with xarray.open_dataset and '
+                         'then select the variable you want.')
+    else:
+        data_array, = dataset.data_vars.values()
+
+    data_array._file_obj = dataset._file_obj
+
+    # Reset names if they were changed during saving
+    # to ensure that we can 'roundtrip' perfectly
+    if DATAARRAY_NAME in dataset.attrs:
+        data_array.name = dataset.attrs[DATAARRAY_NAME]
+        del dataset.attrs[DATAARRAY_NAME]
+
+    if data_array.name == DATAARRAY_VARIABLE:
+        data_array.name = None
+
+    return data_array
+
+
 class _MultiFileCloser(object):
     def __init__(self, file_objs):
         self.file_objs = file_objs

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -578,6 +578,19 @@ def where(self, cond, other=None, drop=False):
 
         return outobj._where(outcond)
 
+    def close(self):
+        """Close any files linked to this object
+        """
+        if self._file_obj is not None:
+            self._file_obj.close()
+        self._file_obj = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
     # this has no runtime function - these are listed so IDEs know these methods
     # are defined and don't warn on these operations
     __lt__ = __le__ =__ge__ = __gt__ = __add__ = __sub__ = __mul__ = \

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -221,8 +221,12 @@ def __init__(self, data, coords=None, dims=None, name=None,
         self._variable = variable
         self._coords = coords
         self._name = name
+
+        self._file_obj = None
+
         self._initialized = True
 
+
     __default = object()
 
     def _replace(self, variable=None, coords=None, name=__default):
@@ -1092,6 +1096,78 @@ def to_masked_array(self, copy=True):
         isnull = pd.isnull(self.values)
         return np.ma.MaskedArray(data=self.values, mask=isnull, copy=copy)
 
+    def to_netcdf(self, *args, **kwargs):
+        """
+        Write DataArray contents to a netCDF file.
+
+        Parameters
+        ----------
+        path : str, optional
+            Path to which to save this dataset. If no path is provided, this
+            function returns the resulting netCDF file as a bytes object; in
+            this case, we need to use scipy.io.netcdf, which does not support
+            netCDF version 4 (the default format becomes NETCDF3_64BIT).
+        mode : {'w', 'a'}, optional
+            Write ('w') or append ('a') mode. If mode='w', any existing file at
+            this location will be overwritten.
+        format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional
+            File format for the resulting netCDF file:
+
+            * NETCDF4: Data is stored in an HDF5 file, using netCDF4 API
+              features.
+            * NETCDF4_CLASSIC: Data is stored in an HDF5 file, using only
+              netCDF 3 compatible API features.
+            * NETCDF3_64BIT: 64-bit offset version of the netCDF 3 file format,
+              which fully supports 2+ GB files, but is only compatible with
+              clients linked against netCDF version 3.6.0 or later.
+            * NETCDF3_CLASSIC: The classic netCDF 3 file format. It does not
+              handle 2+ GB files very well.
+
+            All formats are supported by the netCDF4-python library.
+            scipy.io.netcdf only supports the last two formats.
+
+            The default format is NETCDF4 if you are saving a file to disk and
+            have the netCDF4-python library available. Otherwise, xarray falls
+            back to using scipy to write netCDF files and defaults to the
+            NETCDF3_64BIT format (scipy does not support netCDF4).
+        group : str, optional
+            Path to the netCDF4 group in the given file to open (only works for
+            format='NETCDF4'). The group(s) will be created if necessary.
+        engine : {'netcdf4', 'scipy', 'h5netcdf'}, optional
+            Engine to use when writing netCDF files. If not provided, the
+            default engine is chosen based on available dependencies, with a
+            preference for 'netcdf4' if writing to a file on disk.
+        encoding : dict, optional
+            Nested dictionary with variable names as keys and dictionaries of
+            variable specific encodings as values, e.g.,
+            ``{'my_variable': {'dtype': 'int16', 'scale_factor': 0.1, 'zlib': True}, ...}``
+
+        Notes
+        -----
+        Only xarray.Dataset objects can be written to netCDF files, so
+        the xarray.DataArray is converted to a xarray.Dataset object
+        containing a single variable. If the DataArray has no name, or if the
+        name is the same as a co-ordinate name, then it is given the name
+        '__xarray_dataarray_variable__'.
+
+        All parameters are passed directly to `xarray.Dataset.to_netcdf`.
+        """
+        from ..backends.api import DATAARRAY_NAME, DATAARRAY_VARIABLE
+
+        if not self.name:
+            # If no name is set then use a generic xarray name
+            dataset = self.to_dataset(name=DATAARRAY_VARIABLE)
+        elif self.name in list(self.coords):
+            # The name is the same as one of the coords names, which netCDF
+            # doesn't support, so rename it but keep track of the old name
+            dataset = self.to_dataset(name=DATAARRAY_VARIABLE)
+            dataset.attrs[DATAARRAY_NAME] = self.name
+        else:
+            # No problems with the name - so we're fine!
+            dataset = self.to_dataset()
+
+        dataset.to_netcdf(*args, **kwargs)
+
     def to_dict(self):
         """
         Convert this xarray.DataArray into a dictionary following xarray

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -240,19 +240,6 @@ def load_store(cls, store, decoder=None):
         obj._file_obj = store
         return obj
 
-    def close(self):
-        """Close any files linked to this dataset
-        """
-        if self._file_obj is not None:
-            self._file_obj.close()
-        self._file_obj = None
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.close()
-
     def __getstate__(self):
         """Always load data in-memory before pickling"""
         self.load()

diff --git a/xarray/test/test_backends.py b/xarray/test/test_backends.py
@@ -13,7 +13,8 @@
 import pandas as pd
 
 import xarray as xr
-from xarray import Dataset, open_dataset, open_mfdataset, backends, save_mfdataset
+from xarray import (Dataset, DataArray, open_dataset, open_dataarray,
+                    open_mfdataset, backends, save_mfdataset)
 from xarray.backends.common import robust_getitem
 from xarray.backends.netCDF4_ import _extract_nc4_encoding
 from xarray.core.pycompat import iteritems, PY3
@@ -1071,7 +1072,6 @@ def test_extract_h5nc_encoding(self):
         with self.assertRaisesRegexp(ValueError, 'unexpected encoding'):
             _extract_nc4_encoding(var, raise_on_invalid=True)
 
-
 class MiscObject:
     pass
 
@@ -1172,3 +1172,38 @@ def new_dataset_and_coord_attrs():
             attrs['test'] = np.arange(12).reshape(3, 4)
             with create_tmp_file() as tmp_file:
                 ds.to_netcdf(tmp_file)
+
+@requires_netCDF4
+class TestDataArrayToNetCDF(TestCase):
+
+    def test_dataarray_to_netcdf_no_name(self):
+        original_da = DataArray(np.arange(12).reshape((3, 4)))
+
+        with create_tmp_file() as tmp:
+            original_da.to_netcdf(tmp)
+
+            with open_dataarray(tmp) as loaded_da:
+                self.assertDataArrayIdentical(original_da, loaded_da)
+
+
+    def test_dataarray_to_netcdf_with_name(self):
+        original_da = DataArray(np.arange(12).reshape((3, 4)),
+                                name='test')
+
+        with create_tmp_file() as tmp:
+            original_da.to_netcdf(tmp)
+
+            with open_dataarray(tmp) as loaded_da:
+                self.assertDataArrayIdentical(original_da, loaded_da)
+
+
+    def test_dataarray_to_netcdf_coord_name_clash(self):
+        original_da = DataArray(np.arange(12).reshape((3, 4)),
+                                dims=['x', 'y'],
+                                name='x')
+
+        with create_tmp_file() as tmp:
+            original_da.to_netcdf(tmp)
+
+            with open_dataarray(tmp) as loaded_da:
+                self.assertDataArrayIdentical(original_da, loaded_da)