Source code for arviz.plots.lmplot

"""Plot regression figure."""
import warnings
from numbers import Integral

import xarray as xr
import numpy as np
from xarray.core.dataarray import DataArray

from ..sel_utils import xarray_var_iter
from ..rcparams import rcParams
from .plot_utils import default_grid, filter_plotters_list, get_plotting_function

[docs]def plot_lm( y, idata=None, x=None, y_model=None, y_hat=None, num_samples=50, kind_pp="samples", kind_model="lines", xjitter=False, plot_dim=None, backend=None, y_kwargs=None, y_hat_plot_kwargs=None, y_hat_fill_kwargs=None, y_model_plot_kwargs=None, y_model_fill_kwargs=None, y_model_mean_kwargs=None, backend_kwargs=None, show=None, figsize=None, textsize=None, axes=None, legend=True, grid=True, ): """Posterior predictive and mean plots for regression-like data. Parameters ---------- y : str or DataArray or ndarray If str, variable name from ``observed_data``. idata : InferenceData, Optional Optional only if ``y`` is not str. x : str, tuple of strings, DataArray or array-like, optional If str or tuple, variable name from ``constant_data``. If ndarray, could be 1D, or 2D for multiple plots. If None, coords name of ``y`` (``y`` should be DataArray). y_model : str or Sequence, Optional If str, variable name from ``posterior``. Its dimensions should be same as ``y`` plus added chains and draws. y_hat : str, Optional If str, variable name from ``posterior_predictive``. Its dimensions should be same as ``y`` plus added chains and draws. num_samples : int, Optional, Default 50 Significant if ``kind_pp`` is "samples" or ``kind_model`` is "lines". Number of samples to be drawn from posterior predictive or kind_pp : {"samples", "hdi"}, Default "samples" Options to visualize uncertainty in data. kind_model : {"lines", "hdi"}, Default "lines" Options to visualize uncertainty in mean of the data. plot_dim : str, Optional Necessary if ``y`` is multidimensional. backend : str, Optional Select plotting backend {"matplotlib","bokeh"}. Default "matplotlib". y_kwargs : dict, optional Passed to :meth:`mpl:matplotlib.axes.Axes.plot` in matplotlib and :meth:`` in bokeh y_hat_plot_kwargs : dict, optional Passed to :meth:`mpl:matplotlib.axes.Axes.plot` in matplotlib and :meth:`` in bokeh y_hat_fill_kwargs : dict, optional Passed to :func:`arviz.plot_hdi` y_model_plot_kwargs : dict, optional Passed to :meth:`mpl:matplotlib.axes.Axes.plot` in matplotlib and :meth:`bokeh:bokeh.plotting.Figure.line` in bokeh y_model_fill_kwargs : dict, optional Significant if ``kind_model`` is "hdi". Passed to :func:`arviz.plot_hdi` y_model_mean_kwargs : dict, optional Passed to :meth:`mpl:matplotlib.axes.Axes.plot` in matplotlib and :meth:`bokeh:bokeh.plotting.Figure.line` in bokeh backend_kwargs : dict, optional These are kwargs specific to the backend being used. Passed to :func:`matplotlib.pyplot.subplots` or :func:`bokeh.plotting.figure`. figsize : tuple, optional Figure size. If None it will be defined automatically. textsize : float, optional Text size scaling factor for labels, titles and lines. If None it will be autoscaled based on ``figsize``. axes : numpy array-like of matplotlib axes or bokeh figures, optional A 2D array of locations into which to plot the densities. If not supplied, Arviz will create its own array of plot areas (and return it). show: bool, optional Call backend show function. legend : bool, optional Add legend to figure. By default True. grid : bool, optional Add grid to figure. By default True. Returns ------- axes: matplotlib axes or bokeh figures See Also -------- plot_ts : Plot timeseries data plot_ppc : Plot for posterior/prior predictive checks Examples -------- Plot regression default plot .. plot:: :context: close-figs >>> import arviz as az >>> import numpy as np >>> import xarray as xr >>> idata = az.load_arviz_data('regression1d') >>> x = xr.DataArray(np.linspace(0, 1, 100)) >>> idata.posterior["y_model"] = idata.posterior["intercept"] + idata.posterior["slope"]*x >>> az.plot_lm(idata=idata, y="y", x=x) Plot regression data and mean uncertainty .. plot:: :context: close-figs >>> az.plot_lm(idata=idata, y="y", x=x, y_model="y_model") Plot regression data and mean uncertainty in hdi form .. plot:: :context: close-figs >>> az.plot_lm( ... idata=idata, y="y", x=x, y_model="y_model", kind_pp="hdi", kind_model="hdi" ... ) Plot regression data for multi-dimensional y using plot_dim .. plot:: :context: close-figs >>> data = az.from_dict( ... observed_data = { "y": np.random.normal(size=(5, 7)) }, ... posterior_predictive = {"y": np.random.randn(4, 1000, 5, 7) / 2}, ... dims={"y": ["dim1", "dim2"]}, ... coords={"dim1": range(5), "dim2": range(7)} ... ) >>> az.plot_lm(idata=data, y="y", plot_dim="dim1") """ if kind_pp not in ("samples", "hdi"): raise ValueError("kind_ppc should be either samples or hdi") if kind_model not in ("lines", "hdi"): raise ValueError("kind_model should be either lines or hdi") if y_hat is None and isinstance(y, str): y_hat = y if isinstance(y, str): y = idata.observed_data[y] elif not isinstance(y, DataArray): y = xr.DataArray(y) if len(y.dims) > 1 and plot_dim is None: raise ValueError("Argument plot_dim is needed in case of multidimensional data") x_var_names = None if isinstance(x, str): x = idata.constant_data[x] x_skip_dims = x.dims elif isinstance(x, tuple): x_var_names = x x = idata.constant_data x_skip_dims = x.dims elif isinstance(x, DataArray): x_skip_dims = x.dims elif x is None: x = y.coords[y.dims[0]] if plot_dim is None else y.coords[plot_dim] x_skip_dims = x.dims else: x = xr.DataArray(x) x_skip_dims = [x.dims[-1]] # If posterior is present in idata and y_hat is there, get its values if isinstance(y_model, str): if "posterior" not in idata.groups(): warnings.warn("Posterior not found in idata", UserWarning) y_model = None elif hasattr(idata.posterior, y_model): y_model = idata.posterior[y_model] else: warnings.warn("y_model not found in posterior", UserWarning) y_model = None # If posterior_predictive is present in idata and y_hat is there, get its values if isinstance(y_hat, str): if "posterior_predictive" not in idata.groups(): warnings.warn("posterior_predictive not found in idata", UserWarning) y_hat = None elif hasattr(idata.posterior_predictive, y_hat): y_hat = idata.posterior_predictive[y_hat] else: warnings.warn("y_hat not found in posterior_predictive", UserWarning) y_hat = None # Check if num_pp_smaples is valid and generate num_pp_smaples number of random indexes. # Only needed if kind_pp="samples" or kind_model="lines". Not req for plotting hdi pp_sample_ix = None if (y_hat is not None and kind_pp == "samples") or ( y_model is not None and kind_model == "lines" ): if y_hat is not None: total_pp_samples = y_hat.sizes["chain"] * y_hat.sizes["draw"] else: total_pp_samples = y_model.sizes["chain"] * y_model.sizes["draw"] if ( not isinstance(num_samples, Integral) or num_samples < 1 or num_samples > total_pp_samples ): raise TypeError(f"`num_samples` must be an integer between 1 and {total_pp_samples}.") pp_sample_ix = np.random.choice(total_pp_samples, size=num_samples, replace=False) # crucial step in case of multidim y if plot_dim is None: skip_dims = list(y.dims) elif isinstance(plot_dim, str): skip_dims = [plot_dim] elif isinstance(plot_dim, tuple): skip_dims = list(plot_dim) # Generate x axis plotters. x = filter_plotters_list( plotters=list( xarray_var_iter( x, var_names=x_var_names, skip_dims=set(x_skip_dims), combined=True, ) ), plot_kind="plot_lm", ) # Generate y axis plotters y = filter_plotters_list( plotters=list( xarray_var_iter( y, skip_dims=set(skip_dims), combined=True, ) ), plot_kind="plot_lm", ) # If there are multiple x and multidimensional y, we need total of len(x)*len(y) graphs len_y = len(y) len_x = len(x) length_plotters = len_x * len_y y = np.tile(y, (len_x, 1)) x = np.tile(x, (len_y, 1)) # Filter out the required values to generate plotters if y_hat is not None: if kind_pp == "samples": y_hat = y_hat.stack(__sample__=("chain", "draw"))[..., pp_sample_ix] skip_dims += ["__sample__"] y_hat = [ tup for _, tup in zip( range(len_y), xarray_var_iter( y_hat, skip_dims=set(skip_dims), combined=True, ), ) ] y_hat = np.tile(y_hat, (len_x, 1)) # Filter out the required values to generate plotters if y_model is not None: if kind_model == "lines": y_model = y_model.stack(__sample__=("chain", "draw"))[..., pp_sample_ix] y_model = [ tup for _, tup in zip( range(len_y), xarray_var_iter( y_model, skip_dims=set(y_model.dims), combined=True, ), ) ] y_model = np.tile(y_model, (len_x, 1)) rows, cols = default_grid(length_plotters) lmplot_kwargs = dict( x=x, y=y, y_model=y_model, y_hat=y_hat, num_samples=num_samples, kind_pp=kind_pp, kind_model=kind_model, length_plotters=length_plotters, xjitter=xjitter, rows=rows, cols=cols, y_kwargs=y_kwargs, y_hat_plot_kwargs=y_hat_plot_kwargs, y_hat_fill_kwargs=y_hat_fill_kwargs, y_model_plot_kwargs=y_model_plot_kwargs, y_model_fill_kwargs=y_model_fill_kwargs, y_model_mean_kwargs=y_model_mean_kwargs, backend_kwargs=backend_kwargs, show=show, figsize=figsize, textsize=textsize, axes=axes, legend=legend, grid=grid, ) if backend is None: backend = rcParams["plot.backend"] backend = backend.lower() plot = get_plotting_function("plot_lm", "lmplot", backend) ax = plot(**lmplot_kwargs) return ax