Source code for arviz.plots.ecdfplot

"""Plot ecdf or ecdf-difference plot with confidence bands."""
import numpy as np
from scipy.stats import uniform

from ..rcparams import rcParams
from ..stats.ecdf_utils import compute_ecdf, ecdf_confidence_band, _get_ecdf_points
from .plot_utils import get_plotting_function


[docs] def plot_ecdf( values, values2=None, cdf=None, difference=False, pit=False, confidence_bands=None, pointwise=False, npoints=100, num_trials=500, fpr=0.05, figsize=None, fill_band=True, plot_kwargs=None, fill_kwargs=None, plot_outline_kwargs=None, ax=None, show=None, backend=None, backend_kwargs=None, **kwargs, ): r"""Plot ECDF or ECDF-Difference Plot with Confidence bands. Plots of the empirical CDF estimates of an array. When `values2` argument is provided, the two empirical CDFs are overlaid with the distribution of `values` on top (in a darker shade) and confidence bands in a more transparent shade. Optionally, the difference between the two empirical CDFs can be computed, and the PIT for a single dataset or a comparison between two samples. Notes ----- This plot computes the confidence bands with the simulated based algorithm presented in [1]_. Parameters ---------- values : array-like Values to plot from an unknown continuous or discrete distribution. values2 : array-like, optional Values to compare to the original sample. cdf : callable, optional Cumulative distribution function of the distribution to compare the original sample. The function must take as input a numpy array of draws from the distribution. difference : bool, default False If True then plot ECDF-difference plot otherwise ECDF plot. pit : bool, default False If True plots the ECDF or ECDF-diff of PIT of sample. confidence_bands : bool, default None If True plots the simultaneous or pointwise confidence bands with `1 - fpr` confidence level. pointwise : bool, default False If True plots pointwise confidence bands otherwise simultaneous bands. npoints : int, default 100 This denotes the granularity size of our plot i.e the number of evaluation points for the ecdf or ecdf-difference plots. num_trials : int, default 500 The number of random ECDFs to generate for constructing simultaneous confidence bands. fpr : float, default 0.05 The type I error rate s.t `1 - fpr` denotes the confidence level of bands. figsize : (float,float), optional Figure size. If `None` it will be defined automatically. fill_band : bool, default True If True it fills in between to mark the area inside the confidence interval. Otherwise, plot the border lines. plot_kwargs : dict, optional Additional kwargs passed to :func:`mpl:matplotlib.pyplot.step` or :meth:`bokeh.plotting.figure.step` fill_kwargs : dict, optional Additional kwargs passed to :func:`mpl:matplotlib.pyplot.fill_between` or :meth:`bokeh:bokeh.plotting.Figure.varea` plot_outline_kwargs : dict, optional Additional kwargs passed to :meth:`mpl:matplotlib.axes.Axes.plot` or :meth:`bokeh:bokeh.plotting.Figure.line` ax :axes, optional Matplotlib axes or bokeh figures. show : bool, optional Call backend show function. backend : {"matplotlib", "bokeh"}, default "matplotlib" Select plotting backend. backend_kwargs : dict, optional These are kwargs specific to the backend being used, passed to :func:`matplotlib.pyplot.subplots` or :class:`bokeh.plotting.figure`. For additional documentation check the plotting method of the backend. Returns ------- axes : matplotlib_axes or bokeh_figure References ---------- .. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A., 2021. Graphical Test for Discrete Uniformity and its Applications in Goodness of Fit Evaluation and Multiple Sample Comparison. arXiv preprint arXiv:2103.10522. Examples -------- Plot ecdf plot for a given sample .. plot:: :context: close-figs >>> import arviz as az >>> from scipy.stats import uniform, binom, norm >>> sample = norm(0,1).rvs(1000) >>> az.plot_ecdf(sample) Plot ecdf plot with confidence bands for comparing a given sample w.r.t a given distribution .. plot:: :context: close-figs >>> distribution = norm(0,1) >>> az.plot_ecdf(sample, cdf = distribution.cdf, confidence_bands = True) Plot ecdf-difference plot with confidence bands for comparing a given sample w.r.t a given distribution .. plot:: :context: close-figs >>> az.plot_ecdf(sample, cdf = distribution.cdf, >>> confidence_bands = True, difference = True) Plot ecdf plot with confidence bands for PIT of sample for comparing a given sample w.r.t a given distribution .. plot:: :context: close-figs >>> az.plot_ecdf(sample, cdf = distribution.cdf, >>> confidence_bands = True, pit = True) Plot ecdf-difference plot with confidence bands for PIT of sample for comparing a given sample w.r.t a given distribution .. plot:: :context: close-figs >>> az.plot_ecdf(sample, cdf = distribution.cdf, >>> confidence_bands = True, difference = True, pit = True) You could also plot the above w.r.t another sample rather than a given distribution. For eg: Plot ecdf-difference plot with confidence bands for PIT of sample for comparing a given sample w.r.t a given sample .. plot:: :context: close-figs >>> sample2 = norm(0,1).rvs(5000) >>> az.plot_ecdf(sample, sample2, confidence_bands = True, difference = True, pit = True) """ if confidence_bands is None: confidence_bands = (values2 is not None) or (cdf is not None) if values2 is None and cdf is None and confidence_bands is True: raise ValueError("For confidence bands you need to specify values2 or the cdf") if cdf is not None and values2 is not None: raise ValueError("To compare sample you need either cdf or values2 and not both") if values2 is None and cdf is None and pit is True: raise ValueError("For PIT specify either cdf or values2") if values2 is None and cdf is None and difference is True: raise ValueError("For ECDF difference plot need either cdf or values2") if values2 is not None: values2 = np.ravel(values2) values2.sort() values = np.ravel(values) values.sort() if pit: eval_points = np.linspace(1 / npoints, 1, npoints) if cdf: sample = cdf(values) else: sample = compute_ecdf(values2, values) / len(values2) cdf_at_eval_points = eval_points rvs = uniform(0, 1).rvs else: eval_points = np.linspace(values[0], values[-1], npoints) sample = values if confidence_bands or difference: if cdf: cdf_at_eval_points = cdf(eval_points) else: cdf_at_eval_points = compute_ecdf(values2, eval_points) else: cdf_at_eval_points = np.zeros_like(eval_points) rvs = None x_coord, y_coord = _get_ecdf_points(sample, eval_points, difference) if difference: y_coord -= cdf_at_eval_points if confidence_bands: ndraws = len(values) band_kwargs = {"prob": 1 - fpr, "num_trials": num_trials, "rvs": rvs, "random_state": None} band_kwargs["method"] = "pointwise" if pointwise else "simulated" lower, higher = ecdf_confidence_band(ndraws, eval_points, cdf_at_eval_points, **band_kwargs) if difference: lower -= cdf_at_eval_points higher -= cdf_at_eval_points else: lower, higher = None, None ecdf_plot_args = dict( x_coord=x_coord, y_coord=y_coord, x_bands=eval_points, lower=lower, higher=higher, confidence_bands=confidence_bands, figsize=figsize, fill_band=fill_band, plot_kwargs=plot_kwargs, fill_kwargs=fill_kwargs, plot_outline_kwargs=plot_outline_kwargs, ax=ax, show=show, backend_kwargs=backend_kwargs, **kwargs, ) if backend is None: backend = rcParams["plot.backend"] backend = backend.lower() plot = get_plotting_function("plot_ecdf", "ecdfplot", backend) ax = plot(**ecdf_plot_args) return ax