I am learning Bokeh and trying to setup two-steps filter using Bokeh.
The logic is: (1) select data using widgets, then plot and show table1.
(2) Then using Checkboxgroup (labels will be obtained from the preliminary selection from (1)) to further select, and return table2 and statistic results describe()
for two columns of the further-selected dataset. My codes:
from os.path import dirname, join
import numpy as np
import pandas as pd
from math import pi
from bokeh.io import curdoc
from bokeh.layouts import column, row
from bokeh.models import (ColumnDataSource, Div, Select, Slider, RangeSlider, TextInput, PreText,
CheckboxGroup, SingleIntervalTicker, DataTable, NumberFormatter,
TableColumn)
from bokeh.plotting import figure
# Data cleaning
veg = pd.read_csv("aaaaaaa.csv")
cooking = ['All', 'Raw']
preserve = ['All', 'Fresh']
regions_UN = ["All", "Africa", "Americas", "Asia", "Europe", "Oceania"]
def nix(val, lst):
return [x for x in lst if x != val]
axis_map = {
"Vegetable botanic family": "Botanic family",
"Nitrate level (mg/kg)": "Nitrate_corrected",
"Year": "Publication year",
"Region": "Regions_UN"
}
desc1 = Div(text=open(join(dirname(__file__), "Head.html")).read(), sizing_mode="stretch_width")
desc2 = Div(text=open(join(dirname(__file__), "Instructions.html")).read(), sizing_mode="stretch_width")
# Create Input Widgets
food_name = TextInput(title="Vegetable name contains")
nitrate = RangeSlider(title="Nitrate (mg/kg) range", value=(0, 5000), start=0, end=5000, step=100, format="0,0")
min_year = Slider(title="Publication Year from", start=1980, end=2020, value=1980, step=1)
max_year = Slider(title="Publication Year to", start=1980, end=2020, value=2020, step=1)
cooking_method = Select(title="Cooking Method", value="Raw", options=sorted(cooking))
preserve_method = Select(title="Preservation Method", value="Fresh", options=sorted(preserve))
country = TextInput(title="Regions_UN of the country")
region = Select(title="Broad regions_UN of the country", value="World", options=regions_UN)
x_axis = Select(title="X-Axis", options=sorted(nix("Nitrate level (mg/kg)", axis_map.keys())), value="Year")
y_axis = Select(title="Y-Axis", options=["Nitrate level (mg/kg)"], value="Nitrate level (mg/kg)")
stats1 = PreText(text='', width=500)
stats2 = PreText(text='', width=500)
checkbox_group = CheckboxGroup(labels=[], active=[0], width=200)
# Create Column Data Source that will be used by the plot
source1 = ColumnDataSource(data=dict(x=[], y=[], vegetable=[], family=[],
year=[], cooking_method=[], preserve_method=[],
country=[], region=[], color=[], alpha=[])
)
TOOLTIPS = [
("Vegetable", "@vegetable"),
("Nitrate", "@y"),
("Year", "@year"),
("Country", "@country")
]
# Create plot
p = figure(plot_height=1000, plot_width=1200, title="", tools='pan,wheel_zoom,xbox_select,reset',
toolbar_location="left", tooltips=TOOLTIPS, sizing_mode="scale_both")
p.circle(x="x", y="y", source=source1, size=6, color="color", line_color=None, fill_alpha="alpha")
p.xaxis.major_label_orientation = pi / 4
p.yaxis.major_label_orientation = "vertical"
p.xaxis.ticker = SingleIntervalTicker(interval=1)
p.xaxis.minor_tick_line_color = None # turn off x-axis minor ticks
# Create table
columns1 = [
TableColumn(field="vegetable", title="Common vegetable name"),
TableColumn(field="family", title="Botanic family"),
TableColumn(field="y", title="Nitrate level (mg/kg)", formatter=NumberFormatter(format="0,0.0")),
TableColumn(field="cooking_method", title="Cooking method"),
TableColumn(field="preserve_method", title="Preservation method"),
TableColumn(field="country", title="Country_modified"),
TableColumn(field="region", title="Broad regions_UN"),
TableColumn(field="year", title="Publication year")
]
table1 = DataTable(source=source1, columns=columns1, width=800)
def update_stats(stats, df, a, b):
stats.text = str(df[[a, b]].describe())
def update_dynamic_selection(df):
food_name_val = food_name.value.strip().lower()
if food_name_val != "":
checkbox_group.labels = sorted(df["Common vegetable name"].unique().tolist())
else:
checkbox_group.labels = ['Please select food first']
def select_food():
food_name_val = food_name.value.strip().lower()
cooking_method_val = cooking_method.value
preserve_method_val = preserve_method.value
country_val = country.value.strip().lower()
region_val = region.value
selected = veg[
(veg['Nitrate_corrected'] >= nitrate.value[0]) & (veg['Nitrate_corrected'] <= nitrate.value[1]) &
(veg["Publication year"] >= min_year.value) &
(veg["Publication year"] <= max_year.value)
]
if food_name_val != "":
selected = selected[selected['Column1'].str.contains(food_name_val)]
if cooking_method_val != "All":
selected = selected[selected['Cooking method'] == cooking_method_val]
if preserve_method_val != "All":
selected = selected[selected['Preservation method'] == preserve_method_val]
if country_val != "":
selected = selected[selected['Regions_UN'].str.contains(country_val)]
if region_val != "World":
selected = selected[selected['Broad regions_UN'].str.contains(region_val)]
return selected
def update():
current = select_food()
x_name, y_name = axis_map[x_axis.value], axis_map[y_axis.value]
p.xaxis.axis_label = x_axis.value
p.yaxis.axis_label = y_axis.value
p.title.text = "%d records; %d vegetable selected." % (len(current), current["Common vegetable name"].nunique())
update_stats(stats1, current, x_name, y_name)
source1.data = dict(
x=current[x_name],
y=current[y_name],
vegetable=current["Common vegetable name"],
family=current["Botanic family"],
cooking_method=current["Cooking method"],
preserve_method=current["Preservation method"],
year=current["Publication year"],
country=current["Country of sampling"],
region=current["Broad regions_UN"],
color=current["color"],
alpha=current["alpha"]
)
update_dynamic_selection(current)
controls = [food_name, cooking_method, preserve_method, country, region, min_year, max_year,
nitrate, x_axis, y_axis]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
The fist part can work, but when I add table2 and stats2, it does not work.
list1 = ["Common vegetable name", "Botanic family", "Cooking method", "Preservation method", "Nitrate_corrected", "Publication year", "Country of sampling", "Broad regions_UN"]
veg2 = veg[list1]
source2 = ColumnDataSource()
columns2 = [TableColumn(field=i, title=i) for i in veg2.columns]
table2 = DataTable(source=source2, columns=columns2, width=900)
def checkbox_update():
checkbox_active = [checkbox_group.labels[i] for i in checkbox_group.active]
final = veg2[veg2["Common vegetable name"].isin(checkbox_active)]
update_stats(stats2, final, "Publication year", "Nitrate_corrected")
source2.data = final
checkbox_group.on_change("active", checkbox_update())
widgets = column(*controls, width=320, height=1000)
LAYOUT = column(
desc1,
row(widgets, p), row(table1, stats1),
desc2,
row(checkbox_group, table2, stats2)) # sizing_mode="stretch_both"
update() # initial load of the data
checkbox_update()
curdoc().add_root(LAYOUT)
curdoc().title = "Nitrate in vegetables"```
Aucun commentaire:
Enregistrer un commentaire