Source code for arviz.plots.rankplot

"""Histograms of ranked posterior draws, plotted for each chain."""
from itertools import cycle

import matplotlib.pyplot as plt

from ..data import convert_to_dataset
from ..labels import BaseLabeller
from ..sel_utils import xarray_var_iter
from ..rcparams import rcParams
from ..stats.density_utils import _sturges_formula
from ..utils import _var_names
from .plot_utils import default_grid, filter_plotters_list, get_plotting_function


[docs]def plot_rank( data, var_names=None, filter_vars=None, transform=None, coords=None, bins=None, kind="bars", colors="cycle", ref_line=True, labels=True, labeller=None, grid=None, figsize=None, ax=None, backend=None, ref_line_kwargs=None, bar_kwargs=None, vlines_kwargs=None, marker_vlines_kwargs=None, backend_kwargs=None, show=None, ): """Plot rank order statistics of chains. From the paper: Rank plots are histograms of the ranked posterior draws (ranked over all chains) plotted separately for each chain. If all of the chains are targeting the same posterior, we expect the ranks in each chain to be uniform, whereas if one chain has a different location or scale parameter, this will be reflected in the deviation from uniformity. If rank plots of all chains look similar, this indicates good mixing of the chains. This plot was introduced by Aki Vehtari, Andrew Gelman, Daniel Simpson, Bob Carpenter, Paul-Christian Burkner (2019): Rank-normalization, folding, and localization: An improved R-hat for assessing convergence of MCMC. arXiv preprint https://arxiv.org/abs/1903.08008 Parameters ---------- data: obj Any object that can be converted to an :class:`arviz.InferenceData` object. Refer to documentation of :func:`arviz.convert_to_dataset` for details var_names: string or list of variable names Variables to be plotted. Prefix the variables by ``~`` when you want to exclude them from the plot. filter_vars: {None, "like", "regex"}, optional, default=None If `None` (default), interpret var_names as the real variables names. If "like", interpret var_names as substrings of the real variables names. If "regex", interpret var_names as regular expressions on the real variables names. A la ``pandas.filter``. transform: callable Function to transform data (defaults to None i.e.the identity function) coords: mapping, optional Coordinates of var_names to be plotted. Passed to :meth:`xarray.Dataset.sel` bins: None or passed to np.histogram Binning strategy used for histogram. By default uses twice the result of Sturges' formula. See :func:`numpy.histogram` documentation for, other available arguments. kind: string If bars (defaults), ranks are represented as stacked histograms (one per chain). If vlines ranks are represented as vertical lines above or below ``ref_line``. colors: string or list of strings List with valid matplotlib colors, one color per model. Alternative a string can be passed. If the string is `cycle`, it will automatically choose a color per model from matplotlib's cycle. If a single color is passed, e.g. 'k', 'C2' or 'red' this color will be used for all models. Defaults to `cycle`. ref_line: boolean Whether to include a dashed line showing where a uniform distribution would lie labels: bool whether to plot or not the x and y labels, defaults to True labeller : labeller instance, optional Class providing the method ``make_label_vert`` to generate the labels in the plot titles. Read the :ref:`label_guide` for more details and usage examples. grid : tuple Number of rows and columns. Defaults to None, the rows and columns are automatically inferred. figsize: tuple Figure size. If None it will be defined automatically. ax: numpy array-like of matplotlib axes or bokeh figures, optional A 2D array of locations into which to plot the densities. If not supplied, ArviZ will create its own array of plot areas (and return it). backend: str, optional Select plotting backend {"matplotlib","bokeh"}. Default "matplotlib". ref_line_kwargs : dict, optional Reference line keyword arguments, passed to :meth:`mpl:matplotlib.axes.Axes.axhline` or :class:`bokeh:bokeh.models.Span`. bar_kwargs : dict, optional Bars keyword arguments, passed to :meth:`mpl:matplotlib.axes.Axes.bar` or :meth:`bokeh:bokeh.plotting.Figure.vbar`. vlines_kwargs : dict, optional Vlines keyword arguments, passed to :meth:`mpl:matplotlib.axes.Axes.vlines` or :meth:`bokeh:bokeh.plotting.Figure.multi_line`. marker_vlines_kwargs : dict, optional Marker for the vlines keyword arguments, passed to :meth:`mpl:matplotlib.axes.Axes.plot` or :meth:`bokeh:bokeh.plotting.Figure.circle`. backend_kwargs: bool, optional These are kwargs specific to the backend being used, passed to :func:`matplotlib.pyplot.subplots` or :func:`bokeh.plotting.figure`. For additional documentation check the plotting method of the backend. show: bool, optional Call backend show function. Returns ------- axes: matplotlib axes or bokeh figures See Also -------- plot_trace : Plot distribution (histogram or kernel density estimates) and sampled values or rank plot. Examples -------- Show a default rank plot .. plot:: :context: close-figs >>> import arviz as az >>> data = az.load_arviz_data('centered_eight') >>> az.plot_rank(data) Recreate Figure 13 from the arxiv preprint .. plot:: :context: close-figs >>> data = az.load_arviz_data('centered_eight') >>> az.plot_rank(data, var_names='tau') Use vlines to compare results for centered vs noncentered models .. plot:: :context: close-figs >>> import matplotlib.pyplot as plt >>> centered_data = az.load_arviz_data('centered_eight') >>> noncentered_data = az.load_arviz_data('non_centered_eight') >>> _, ax = plt.subplots(1, 2, figsize=(12, 3)) >>> az.plot_rank(centered_data, var_names="mu", kind='vlines', ax=ax[0]) >>> az.plot_rank(noncentered_data, var_names="mu", kind='vlines', ax=ax[1]) Change the aesthetics using kwargs .. plot:: :context: close-figs >>> az.plot_rank(noncentered_data, var_names="mu", kind="vlines", >>> vlines_kwargs={'lw':0}, marker_vlines_kwargs={'lw':3}); """ if transform is not None: data = transform(data) posterior_data = convert_to_dataset(data, group="posterior") if coords is not None: posterior_data = posterior_data.sel(**coords) var_names = _var_names(var_names, posterior_data, filter_vars) plotters = filter_plotters_list( list( xarray_var_iter( posterior_data, var_names=var_names, combined=True, ) ), "plot_rank", ) length_plotters = len(plotters) if bins is None: bins = _sturges_formula(posterior_data, mult=2) if labeller is None: labeller = BaseLabeller() rows, cols = default_grid(length_plotters, grid=grid) chains = len(posterior_data.chain) if colors == "cycle": colors = [ prop for _, prop in zip( range(chains), cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"]) ) ] elif isinstance(colors, str): colors = [colors] * chains rankplot_kwargs = dict( axes=ax, length_plotters=length_plotters, rows=rows, cols=cols, figsize=figsize, plotters=plotters, bins=bins, kind=kind, colors=colors, ref_line=ref_line, labels=labels, labeller=labeller, ref_line_kwargs=ref_line_kwargs, bar_kwargs=bar_kwargs, vlines_kwargs=vlines_kwargs, marker_vlines_kwargs=marker_vlines_kwargs, backend_kwargs=backend_kwargs, show=show, ) if backend is None: backend = rcParams["plot.backend"] backend = backend.lower() # TODO: Add backend kwargs plot = get_plotting_function("plot_rank", "rankplot", backend) axes = plot(**rankplot_kwargs) return axes