Source code for pygmt.src.grd2xyz

"""
grd2xyz - Convert grid to data table
"""
import warnings

import pandas as pd
import xarray as xr
from pygmt.clib import Session
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import (
    GMTTempFile,
    build_arg_string,
    fmt_docstring,
    kwargs_to_strings,
    use_alias,
)

__doctest_skip__ = ["grd2xyz"]


[docs]@fmt_docstring
@use_alias(
    C="cstyle",
    R="region",
    V="verbose",
    W="weight",
    Z="convention",
    b="binary",
    d="nodata",
    f="coltypes",
    h="header",
    o="outcols",
    s="skiprows",
)
@kwargs_to_strings(R="sequence", o="sequence_comma")
def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs):
    r"""
    Convert grid to data table.

    Read a grid and output xyz-triplets as a :class:`numpy.ndarray`,
    :class:`pandas.DataFrame`, or ASCII file.

    Full option list at :gmt-docs:`grd2xyz.html`

    {aliases}

    Parameters
    ----------
    grid : str or xarray.DataArray
        The file name of the input grid or the grid loaded as a
        :class:`xarray.DataArray`. This is the only required parameter.
    output_type : str
        Determine the format the xyz data will be returned in [Default is
        ``pandas``]:

            - ``numpy`` - :class:`numpy.ndarray`
            - ``pandas``- :class:`pandas.DataFrame`
            - ``file`` - ASCII file (requires ``outfile``)
    outfile : str
        The file name for the output ASCII file.
    cstyle : str
        [**f**\|\ **i**].
        Replace the x- and y-coordinates on output with the corresponding
        column and row numbers. These start at 0 (C-style counting); append
        **f** to start at 1 (Fortran-style counting). Alternatively, append
        **i** to write just the two columns *index* and *z*, where *index*
        is the 1-D indexing that GMT uses when referring to grid nodes.
    {R}
        Adding ``region`` will select a subsection of the grid. If this
        subsection exceeds the boundaries of the grid, only the common region
        will be output.
    weight : str
        [**a**\ [**+u**\ *unit*]\|\ *weight*].
        Write out *x,y,z,w*\ , where *w* is the supplied *weight* (or 1 if not
        supplied) [Default writes *x,y,z* only].  Choose **a** to compute
        weights equal to the area each node represents.  For Cartesian grids
        this is simply the product of the *x* and *y* increments (except for
        gridline-registered grids at all sides [half] and corners [quarter]).
        For geographic grids we default to a length unit of **k**. Change
        this by appending **+u**\ *unit*. For such grids, the area
        varies with latitude and also sees special cases for
        gridline-registered layouts at sides, corners, and poles.
    {V}
    convention : str
        [*flags*].
        Write a 1-column ASCII [or binary] table. Output will be organized
        according to the specified ordering convention contained in *flags*.
        If data should be written by rows, make *flags* start with
        **T** (op) if first row is y = ymax or
        **B** (ottom) if first row is y = ymin. Then,
        append **L** or **R** to indicate that first element should start at
        left or right end of row. Likewise for column formats: start with
        **L** or **R** to position first column, and then append **T** or
        **B** to position first element in a row. For gridline registered
        grids: If grid is periodic in x but the written data should not
        contain the (redundant) column at x = xmax, append **x**. For grid
        periodic in y, skip writing the redundant row at y = ymax by
        appending **y**. If the byte-order needs to be swapped, append
        **w**. Select one of several data types (all binary except **a**):

        * **a** ASCII representation of a single item per record
        * **c** int8_t, signed 1-byte character
        * **u** uint8_t, unsigned 1-byte character
        * **h** int16_t, short 2-byte integer
        * **H** uint16_t, unsigned short 2-byte integer
        * **i** int32_t, 4-byte integer
        * **I** uint32_t, unsigned 4-byte integer
        * **l** int64_t, long (8-byte) integer
        * **L** uint64_t, unsigned long (8-byte) integer
        * **f** 4-byte floating point single precision
        * **d** 8-byte floating point double precision

        Default format is scanline orientation of ASCII numbers: **TLa**.
    {b}
    {d}
    {f}
    {h}
    {o}
    {s}

    Returns
    -------
    ret : pandas.DataFrame or numpy.ndarray or None
        Return type depends on ``outfile`` and ``output_type``:

        - None if ``outfile`` is set (output will be stored in file set by
          ``outfile``)
        - :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is
          not set (depends on ``output_type``)

    Example
    -------
    >>> import pygmt
    >>> # Load a grid of @earth_relief_30m data, with an x-range of 10 to 30,
    >>> # and a y-range of 15 to 25
    >>> grid = pygmt.datasets.load_earth_relief(
    ...     resolution="30m", region=[10, 30, 15, 25]
    ... )
    >>> # Create a pandas DataFrame with the xyz data from an input grid
    >>> xyz_dataframe = pygmt.grd2xyz(grid=grid, output_type="pandas")
    >>> xyz_dataframe.head(n=2)
         lon    lat  elevation
    0  10.25  24.75      903.5
    1  10.75  24.75      820.0
    """
    if output_type not in ["numpy", "pandas", "file"]:
        raise GMTInvalidInput(
            "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'."
        )

    if outfile is not None and output_type != "file":
        msg = (
            f"Changing 'output_type' of grd2xyz from '{output_type}' to 'file' "
            "since 'outfile' parameter is set. Please use output_type='file' "
            "to silence this warning."
        )
        warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
        output_type = "file"
    elif outfile is None and output_type == "file":
        raise GMTInvalidInput("Must specify 'outfile' for ASCII output.")

    if "o" in kwargs and output_type == "pandas":
        raise GMTInvalidInput(
            "If 'outcols' is specified, 'output_type' must be either 'numpy'"
            "or 'file'."
        )

    # Set the default column names for the pandas dataframe header
    dataframe_header = ["x", "y", "z"]
    # Let output pandas column names match input DataArray dimension names
    if isinstance(grid, xr.DataArray) and output_type == "pandas":
        # Reverse the dims because it is rows, columns ordered.
        dataframe_header = [grid.dims[1], grid.dims[0], grid.name]

    with GMTTempFile() as tmpfile:
        with Session() as lib:
            file_context = lib.virtualfile_from_data(check_kind="raster", data=grid)
            with file_context as infile:
                if outfile is None:
                    outfile = tmpfile.name
                lib.call_module(
                    "grd2xyz", build_arg_string(kwargs, infile=infile, outfile=outfile)
                )

        # Read temporary csv output to a pandas table
        if outfile == tmpfile.name:  # if user did not set outfile, return pd.DataFrame
            result = pd.read_csv(
                tmpfile.name, sep="\t", names=dataframe_header, comment=">"
            )
        elif outfile != tmpfile.name:  # return None if outfile set, output in outfile
            result = None

        if output_type == "numpy":
            result = result.to_numpy()
    return result