This docs on this page refers to a PREVIOUS VERSION. For the latest stable release, go to https://docs.bokeh.org/

Archived docs for versions <= 1.0.4 have had to be modified from their original published configuration, and may be missing some features (e.g. source listing)

All users are encourage to update to version 1.1 or later, as soon as they are able.

Bokeh Docs

boxplot

< elements | back to Gallery | logaxis >

                import numpy as np
                import pandas as pd
                
                from bokeh.plotting import figure, show, output_file
                
                # generate some synthetic time series for six different categories
                cats = list("abcdef")
                yy = np.random.randn(2000)
                g = np.random.choice(cats, 2000)
                for i, l in enumerate(cats):
                    yy[g == l] += i // 2
                df = pd.DataFrame(dict(score=yy, group=g))
                
                # find the quartiles and IQR for each category
                groups = df.groupby('group')
                q1 = groups.quantile(q=0.25)
                q2 = groups.quantile(q=0.5)
                q3 = groups.quantile(q=0.75)
                iqr = q3 - q1
                upper = q3 + 1.5*iqr
                lower = q1 - 1.5*iqr
                
                # find the outliers for each category
                def outliers(group):
                    cat = group.name
                    return group[(group.score > upper.loc[cat][0]) | (group.score < lower.loc[cat][0])]['score']
                out = groups.apply(outliers).dropna()
                
                # prepare outlier data for plotting, we need coordinates for every outlier.
                outx = []
                outy = []
                for cat in cats:
                    # only add outliers if they exist
                    if not out.loc[cat].empty:
                        for value in out[cat]:
                            outx.append(cat)
                            outy.append(value)
                
                p = figure(tools="save", background_fill_color="#EFE8E2", title="", x_range=cats)
                
                # if no outliers, shrink lengths of stems to be no longer than the minimums or maximums
                qmin = groups.quantile(q=0.00)
                qmax = groups.quantile(q=1.00)
                upper.score = [min([x,y]) for (x,y) in zip(list(qmax.iloc[:,0]),upper.score) ]
                lower.score = [max([x,y]) for (x,y) in zip(list(qmin.iloc[:,0]),lower.score) ]
                
                # stems
                p.segment(cats, upper.score, cats, q3.score, line_width=2, line_color="black")
                p.segment(cats, lower.score, cats, q1.score, line_width=2, line_color="black")
                
                # boxes
                p.rect(cats, (q3.score+q2.score)/2, 0.7, q3.score-q2.score,
                    fill_color="#E08E79", line_width=2, line_color="black")
                p.rect(cats, (q2.score+q1.score)/2, 0.7, q2.score-q1.score,
                    fill_color="#3B8686", line_width=2, line_color="black")
                
                # whiskers (almost-0 height rects simpler than segments)
                p.rect(cats, lower.score, 0.2, 0.01, line_color="black")
                p.rect(cats, upper.score, 0.2, 0.01, line_color="black")
                
                # outliers
                p.circle(outx, outy, size=6, color="#F38630", fill_alpha=0.6)
                
                p.xgrid.grid_line_color = None
                p.ygrid.grid_line_color = "white"
                p.grid.grid_line_width = 2
                p.xaxis.major_label_text_font_size="12pt"
                
                output_file("boxplot.html", title="boxplot.py example")
                
                show(p)