DEV Community

João Lucas Hernandes Detogni
João Lucas Hernandes Detogni

Posted on

Parallel Coordinates function

Hi there, this is my first post here, and i would like to introduce an function to a parallel coordinates plot.

`def _Paralel_plot(
dataframe:DataFrame,
cols:list[str],
axes_names:list|None=None,
cmap_style:str|None=None,
alpha:float|None=None,
figsize:tuple|None=None,
invert_axes:list[int]=None,
savePath:str|None=None,) -> Figure|None:

'''
Dataframe: pandas dataframe
cols: dataframe cols to plot
axes_names: change the cols names
cmap_style: cmap style from matplolib cmaps. Default = None, viridis
alpha: line opacity. Default = 0.8
figsize: figure size. Default = (10,4)
invert_axes: list with axes_ids (as type integer) to invert. Default = None (coming soon)

BASE CODE CREDITS:
JohanC (user:12046409) encontrado em https://stackoverflow.com/questions/8230638/parallel-coordinates-plot-in-matplotlib

INTRODUCE:

categorical axes
cmap colors and cbar

'''

data = dataframe[cols].copy()

convert categoricals to numerics and save the encoder

encoders = {}
for name in cols:

if data[name].dtype == 'object':
    label_encoder = LabelEncoder()
    data[name] = label_encoder.fit_transform(data[name])
    encoders[name] = label_encoder
Enter fullscreen mode Exit fullscreen mode

JohanC starts here

ys = data[cols].values
n_rown, n_cols = ys.shape

Ajust the axes

scaler = MinMaxScaler(feature_range=(0,1))
zs = scaler.fit_transform(ys)

create figura and host axes

fig, host = plt.subplots(figsize=figsize or (10, 4))

cbar its always the last one column

cbar_col = zs[:, -1]
norm = mcolors.Normalize(vmin = cbar_col.min(), vmax = cbar_col.max())
cmap = get_cmap(cmap_style or 'viridis')
cbar:Colorbar = plt.colorbar(ScalarMappable(norm, cmap), ax=host, pad=0)

axes creation

axes = [host] + [host.twinx() for i in range(n_cols - 2)] + [cbar.ax]

for i, ax in enumerate(axes):

ax.set_ylim(-0.05, 1.05)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

if ax != host:
    ax.spines['left'].set_visible(False)
    ax.yaxis.set_ticks_position('right')

    position_func = i / (n_cols - 1) 
    ax.spines["right"].set_position(    ("axes", position_func)      )
Enter fullscreen mode Exit fullscreen mode

define yticks

old_yticks:list[np.ndarray] = [x.get_yticks() for x in axes]
new_yticklabels = scaler.inverse_transform(np.array(old_yticks).T)

for i, ax in enumerate(axes):

ax:plt.Axes
if (k := data.columns[i]) in encoders.keys():
    k_encoder:LabelEncoder = encoders[k]
    ax.set_yticks(np.unique(ys[:,i]), k_encoder.classes_)

else:
    ax.set_yticklabels(np.round(new_yticklabels[:,i], 2))
Enter fullscreen mode Exit fullscreen mode

define xticks

host.set_xlim(0, n_cols - 1)
host.set_xticks(range(n_cols), axes_names or cols)

host.tick_params(axis='x', which='major', pad=7)
host.spines['right'].set_visible(False)
host.xaxis.tick_top()

generation of smoothed lines (see johanC)

for row in range(n_rown):

row:str
serie1 = [x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)]
serie2 = np.repeat(zs[row, :], 3)[1:-1]
verts = list(zip(serie1, serie2))

color = cmap(norm(cbar_col[row].astype(float)), alpha = alpha or 0.8)   #cbar color

codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none', lw=1, edgecolor=color)
host.add_patch(patch)
Enter fullscreen mode Exit fullscreen mode

plt.tight_layout()

`

This plot allows for both categorical and numerical axes. It's similar to a Weights and Biases plot (https://wandb.ai/site), featuring smooth curves, with the colorbar axis indicating the metric. Many ideas can be developed within this script, such as an axis inversion subroutine, y-axis label customization, or the removal of transparent squares at the colorbar limits (which is highly desirable).
I would like to give credit to JohanC (user:12046409), who provided the code base in this post: https://stackoverflow.com/questions/8230638/parallel-coordinates-plot-in-matplotlib. Additionally, I hope that this script I am sharing can be further improved.

Top comments (1)

Collapse
 
liso1201 profile image
João Lucas Hernandes Detogni

def _Paralel_plot(
dataframe:DataFrame,
cols:list[str],
axes_names:list|None=None,
cmap_style:str|None=None,
alpha:float|None=None,
figsize:tuple|None=None,
invert_axes:list[int]=None,
savePath:str|None=None,) -> Figure|None:

'''
Dataframe: pandas dataframe
cols: dataframe cols to plot
axes_names: change the cols names
cmap_style: cmap style from matplolib cmaps. Default = None, viridis
alpha: line opacity. Default = 0.8
figsize: figure size. Default = (10,4)
invert_axes: list with axes_ids (as type integer) to invert. Default = None (coming soon)

BASE CODE CREDITS:
JohanC (user:12046409) encontrado em stackoverflow.com/questions/823063...

INTRODUCE:

categorical axes
cmap colors and cbar

'''

data = dataframe[cols].copy()

convert categoricals to numerics and save the encoder

encoders = {}
for name in cols:

if data[name].dtype == 'object':
    label_encoder = LabelEncoder()
    data[name] = label_encoder.fit_transform(data[name])
    encoders[name] = label_encoder
Enter fullscreen mode Exit fullscreen mode

JohanC starts here

ys = data[cols].values
n_rown, n_cols = ys.shape

Ajust the axes

scaler = MinMaxScaler(feature_range=(0,1))
zs = scaler.fit_transform(ys)

create figura and host axes

fig, host = plt.subplots(figsize=figsize or (10, 4))

cbar its always the last one column

cbar_col = zs[:, -1]
norm = mcolors.Normalize(vmin = cbar_col.min(), vmax = cbar_col.max())
cmap = get_cmap(cmap_style or 'viridis')
cbar:Colorbar = plt.colorbar(ScalarMappable(norm, cmap), ax=host, pad=0)

axes creation

axes = [host] + [host.twinx() for i in range(n_cols - 2)] + [cbar.ax]

for i, ax in enumerate(axes):

ax.set_ylim(-0.05, 1.05)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

if ax != host:
    ax.spines['left'].set_visible(False)
    ax.yaxis.set_ticks_position('right')

    position_func = i / (n_cols - 1) 
    ax.spines["right"].set_position(    ("axes", position_func)      )
Enter fullscreen mode Exit fullscreen mode

define yticks

old_yticks:list[np.ndarray] = [x.get_yticks() for x in axes]
new_yticklabels = scaler.inverse_transform(np.array(old_yticks).T)

for i, ax in enumerate(axes):

ax:plt.Axes
if (k := data.columns[i]) in encoders.keys():
    k_encoder:LabelEncoder = encoders[k]
    ax.set_yticks(np.unique(ys[:,i]), k_encoder.classes_)

else:
    ax.set_yticklabels(np.round(new_yticklabels[:,i], 2))
Enter fullscreen mode Exit fullscreen mode

define xticks

host.set_xlim(0, n_cols - 1)
host.set_xticks(range(n_cols), axes_names or cols)

host.tick_params(axis='x', which='major', pad=7)
host.spines['right'].set_visible(False)
host.xaxis.tick_top()

generation of smoothed lines (see johanC)

for row in range(n_rown):

row:str
serie1 = [x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)]
serie2 = np.repeat(zs[row, :], 3)[1:-1]
verts = list(zip(serie1, serie2))

color = cmap(norm(cbar_col[row].astype(float)), alpha = alpha or 0.8)   #cbar color

codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none', lw=1, edgecolor=color)
host.add_patch(patch)
Enter fullscreen mode Exit fullscreen mode

plt.tight_layout()