Bachelors degrees by gender

Download this notebook from GitHub (right-click to download).


URL: http://matplotlib.org/examples/showcase/bachelors_degrees_by_gender.html

Most examples work across multiple plotting backends equivalent, this example is also available for:

In [1]:
import holoviews as hv
from holoviews import opts
hv.extension('matplotlib')
hv.output(fig='svg')

Define data

In [2]:
import pandas as pd
from matplotlib.cbook import get_sample_data

fname = get_sample_data('percent_bachelors_degrees_women_usa.csv')
gender_degree_data = pd.read_csv(fname)

title = ('Percentage of Bachelor\'s degrees conferred to women in '
        'the U.S.A. by major (1970-2011)\n')

# These are the colors that will be used in the plot
color_sequence = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
                  '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
                  '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
                  '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']

# Offsets for degree labels
y_offsets = {'Foreign Languages': 0.5, 'English': -0.5,
             'Communications and Journalism': 0.75,
             'Art and Performance': -0.25, 'Agriculture': 1.25,
             'Social Sciences and History': 0.25, 'Business': -0.75,
             'Math and Statistics': 0.75, 'Architecture': -0.75,
             'Computer Science': 0.75, 'Engineering': -0.25}

# Load the data into a dataframe and us pd.melt to unpivot the degree column
df = pd.DataFrame(gender_degree_data)
df = pd.melt(df, id_vars='Year', var_name='Degree', value_name='conferred')
df['Degree'] = [d.replace('_', ' ').title() for d in df.Degree]

# Define a formatter that works for both bokeh and matplotlib
def percent_format(x):
    try:
        return '{:0.0f}%'.format(x)
    except:
        return '%d%' % x

# Define the value dimensions
vdim = hv.Dimension('conferred', value_format=percent_format, range=(0, 90))

# Define the dataset
ds = hv.Dataset(df, vdims=vdim)
curves = ds.to(hv.Curve, 'Year', groupby='Degree').overlay()

# Define a function to get the text annotations
max_year = ds['Year'].max()
def offset(row):
    row['conferred'] += y_offsets.get(row.Degree, 0)
    return row
label_df = df[df.Year==max_year].apply(offset, axis=1)
labels = hv.Labels(label_df, ['Year', 'conferred'], 'Degree')

Display in matplotlib

In [3]:
# Define a callback to define a custom grid along the y-axis and disabling the (ugly) axis spines
def cb(plot, element):
    ax = plot.handles['axis']
    ax.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)

(curves * labels).opts(
    opts.Curve(
        aspect=0.7, bgcolor='white', hooks=[cb], labelled=[], fig_size=350,
        show_frame=False, show_grid=False, show_legend=False, xlim=(1970, 2011), xticks=5,
        color=hv.Cycle(values=color_sequence), linewidth=2, title=title),
    opts.Labels(color='Degree', cmap=color_sequence, horizontalalignment='left'))
Out[3]:

Download this notebook from GitHub (right-click to download).