Bachelors degrees by gender¶
Download this notebook from GitHub (right-click to download).
URL: http://matplotlib.org/examples/showcase/bachelors_degrees_by_gender.html
Most examples work across multiple plotting backends equivalent, this example is also available for:
In [1]:
import holoviews as hv
from holoviews import opts
hv.extension('bokeh', 'matplotlib')
Define data¶
This example uses the matplotlib sample data that conda users can fetch using:
conda install -c conda-forge mpl_sample_data
In [2]:
import pandas as pd
from matplotlib.cbook import get_sample_data
fname = get_sample_data('percent_bachelors_degrees_women_usa.csv')
gender_degree_data = pd.read_csv(fname)
title = ('Percentage of Bachelor\'s degrees conferred to women in '
'the U.S.A. by major (1970-2011)\n')
# These are the colors that will be used in the plot
color_sequence = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
'#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
'#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
'#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']
# Offsets for degree labels
y_offsets = {'Foreign Languages': 0.5, 'English': -0.5,
'Communications and Journalism': 0.75,
'Art and Performance': -0.25, 'Agriculture': 1.25,
'Social Sciences and History': 0.25, 'Business': -0.75,
'Math and Statistics': 0.75, 'Architecture': -0.75,
'Computer Science': 0.75, 'Engineering': -0.25}
# Load the data into a dataframe and us pd.melt to unpivot the degree column
df = pd.DataFrame(gender_degree_data)
df = pd.melt(df, id_vars='Year', var_name='Degree', value_name='conferred')
df['Degree'] = [d.replace('_', ' ').title() for d in df.Degree]
# Define a formatter that works for both bokeh and matplotlib
def percent_format(x):
try:
return '{:0.0f}%'.format(x)
except:
return '%d%' % x
# Define the value dimensions
vdim = hv.Dimension('conferred', range=(0, 90))
# Define the dataset
ds = hv.Dataset(df, vdims=vdim)
curves = ds.to(hv.Curve, 'Year', groupby='Degree').overlay().redim(Year=dict(range=(1970, 2030)))
# Define a function to get the text annotations
max_year = ds['Year'].max()
def offset(row):
row['conferred'] += y_offsets.get(row.Degree, 0)
return row
label_df = df[df.Year==max_year].apply(offset, axis=1)
labels = hv.Labels(label_df, ['Year', 'conferred'], 'Degree')
Display¶
In [3]:
overlay = (curves * labels).relabel(title)
gridstyle = {'grid_line_dash': [6, 4], 'grid_line_width': 3, 'grid_bounds': (0, 100)}
overlay.opts(
opts.Curve(show_frame=False, labelled=[], tools=['hover'],
height=900, width=900, show_legend=False, xticks=[1970, 1980, 1990, 2000, 2010],
color=hv.Cycle(values=color_sequence), line_width=2, show_grid=True, yformatter='%d%%'),
opts.Labels(text_color='Degree', cmap=color_sequence, text_align='left'),
opts.NdOverlay(batched=False, gridstyle=gridstyle))
Out[3]:
Download this notebook from GitHub (right-click to download).