Home Bokeh 사용법
Post
Cancel

Bokeh 사용법

Bokeh

특징

  • 깔끔한 Visualization Framework
  • Jupyter Notebook과 궁합이 좋다

Correlation heatmap 그리기

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import bisect
#
from math import pi
from numpy import arange
from itertools import chain
from collections import OrderedDict
#
from bokeh.palettes import RdBu as colors  ### just make sure to import a palette that centers on white (-ish)
from bokeh.models import ColorBar, LinearColorMapper

df = YOUR_DARAFRAME_OBJECT

colors = list(colors[9])  ### we want an odd number to ensure 0 correlation is a distinct color
labels = df.columns
nlabels = len(labels)

def get_bounds(n):
    """Gets bounds for quads with n features"""
    bottom = list(chain.from_iterable([[ii]*nlabels for ii in range(nlabels)]))
    top = list(chain.from_iterable([[ii+1]*nlabels for ii in range(nlabels)]))
    left = list(chain.from_iterable([list(range(nlabels)) for ii in range(nlabels)]))
    right = list(chain.from_iterable([list(range(1,nlabels+1)) for ii in range(nlabels)]))
    return top, bottom, left, right

def get_colors(corr_array, colors):
    """Aligns color values from palette with the correlation coefficient values"""
    ccorr = arange(-1, 1, 1/(len(colors)/2))
    color = []
    for value in corr_array:
        ind = bisect.bisect_left(ccorr, value)
        color.append(colors[ind-1])
    return color

TOOLS="hover,crosshair,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select"

p = figure(plot_width=1278, plot_height=1278,
           x_range=(0,nlabels), y_range=(0,nlabels),
           title="Correlation Coefficient Heatmap (lighter is worse)",
           toolbar_location='right', tools=TOOLS)

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.yaxis.major_label_orientation = pi/4

top, bottom, left, right = get_bounds(nlabels)  ### creates sqaures for plot
color_list = get_colors(df.values.flatten(), colors)

p.quad(top=top, bottom=bottom, left=left,
       right=right, line_color='white',
       color=color_list)

### Set ticks with labels
ticks = [tick+0.5 for tick in list(range(nlabels))]
tick_dict = OrderedDict([[tick, labels[ii]] for ii, tick in enumerate(ticks)])
### Create the correct number of ticks for each axis 
p.xaxis.ticker = ticks
p.yaxis.ticker = ticks
### Override the labels 
p.xaxis.major_label_overrides = tick_dict
p.yaxis.major_label_overrides = tick_dict

### Setup color bar
mapper = LinearColorMapper(palette=colors, low=-1, high=1)
color_bar = ColorBar(color_mapper=mapper, location=(0, 0))
p.add_layout(color_bar, 'right')

show(p)

참조 링크

Bokeh Reference

Correlation heatmap

This post is licensed under CC BY 4.0 by the author.

-

Python