如何报告列意味着数据表过滤 Python Dash

How to report column means with datatable filtering Python Dash

我正在使用 Python Dash 构建一个应用程序,我可以在其中显示破折号数据表,并将数据表(基础 pandas 数据框)的列平均值报告为破折号数据表在它下面..到目前为止,我已经尝试过:


import dash
from dash.dependencies import Input, Output
import dash_table
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import json
wide_data = pd.DataFrame(np.array([[1.24, 2.34, 3.234], [4.24, .45, .06], [7, 8, 9]]),
                   columns=['a', 'b', 'c'])


df = pd.DataFrame(wide_data)


df = pd.DataFrame(wide_data)
df_floats = df.select_dtypes(include=['float64'])
df_floats_means = df_floats.mean(numeric_only=True)
df_floats_means = pd.DataFrame(df_floats_means)
df_floats_means_T = df_floats_means.T


app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1('Customer Complaints Dashboard'),
    html.Label('Read and Writing Queries Filtering'),
#     html.Label2('If "Read filter_query" is chosen, use operators (le, ge, <, <=, >, >=) + tag value to filter'),    
#     html.Label3('If "Write filter_query" is chosen, ___'),

    dcc.RadioItems(
        id='filter-query-read-write',
        options=[
            {'label': 'Read filter_query', 'value': 'read'},
        ],
        value='read'
    ),

    html.Br(),

    dcc.Input(id='filter-query-input', placeholder='Enter filter query'), #this is for write functionality

    html.Div(id='filter-query-output'), #this is associated with read screen

    html.Hr(),

    dash_table.DataTable(
        id='datatable-advanced-filtering',
        columns=[
            {'name': i, 'id': i, 'deletable': True} for i in df.columns
            # omit the id column
            if i != 'id'
        ],

        data=df.to_dict('records'),
        editable=True,
        page_action='native',
        page_size=10,
        filter_action="native",
    fixed_columns={'headers': True, 'data': 1},
    style_table={'minWidth': '100%'}
    ),

    html.Hr(),
    html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}),
    html.Hr(),
    html.Label('Dataframe Means'),

    dash_table.DataTable(
        id='datatable-advanced-filtering2',
        columns=[
            {'name': i, 'id': i, 'deletable': True} for i in df_floats_means_T.columns
            # omit the id column
            if i != 'id'
        ],

        data=df_floats_means_T.to_dict('records'),
        editable=True,
        page_action='native',
        page_size=10,
     #   filter_action="native",
    fixed_columns={'headers': True, 'data': 1},
    style_table={'minWidth': '100%'}
    ),


    html.Hr(),
    html.Div(id='datatable-query-structure2', style={'whitespace': 'pre'}),
    html.Hr(),



])


@app.callback(
    [Output('filter-query-input', 'style'),
     Output('filter-query-output', 'style')],
    [Input('filter-query-read-write', 'value')]
)
def query_input_output(val):
    input_style = {'width': '100%'}
    output_style = {}
    if val == 'read':
        input_style.update(display='none')
        output_style.update(display='inline-block')
    else:
        input_style.update(display='inline-block')
        output_style.update(display='none')
    return input_style, output_style


@app.callback(
    Output('filter-query-output', 'children'),
    [Input('datatable-advanced-filtering', 'filter_query')]
)
def read_query(query):  
    if query is None:
        return "No filter query"
    return dcc.Markdown('`filter_query = "{}"`'.format(query))

@app.callback(
    Output('datatable-advanced-filtering2', 'data'),
    [Input('datatable-advanced-filtering', 'data')]
)

def update_means(data):
    dff = df
    df_floats = dff.select_dtypes(include=['float64'])
    df_floats_means = df_floats.mean(numeric_only=True)
    df_floats_means = pd.DataFrame(df_floats_means)
    df_floats_means_T = df_floats_means.T

    converted_means = df_floats_means_T.to_dict('records')
    return([converted_means])

if __name__ == '__main__':
    app.run_server(threaded=True)

但我知道这是不对的,因为我在应用程序布局的这一部分进行任何过滤之前计算均值 id='datatable-advanced-filtering2'。大家能帮我回调一下吗?

我 100% 是 dash 的新手,而且我还没有看到很多关于如何正确执行此操作的指南。任何帮助将不胜感激。谢谢。

我相信您可以使用主 DataTable 的 data 属性 作为回调函数的输入,并将输出设为 data 属性你的意思是数据表。其结构可能类似于:

@app.callback(
    Output('datatable-advanced-filtering2', 'data'),
    [Input('datatable-advanced-filtering', 'data')]
)
def update_means(data):
    # Use data to construct dataframe
    # Find means using same method used at top of post
    # Convert means df using .to_dict('records')
    # Return converted dict

文档对过滤有这样的说法(强调我的):

By default, these transformations are done clientside. Your Dash callbacks can respond to these modifications by listening to the data property as an Input.

您可以在此处阅读更多内容:https://dash.plotly.com/datatable/interactivity

编辑

所以经过一些实验,我意识到当 filter_action 设置为 'native' 时,DataTable 的过滤实际上并没有更新它的 data 属性。因为该属性实际上并未更新,所以使用 data 属性作为 Input 的任何回调都不会在过滤时触发。为了解决这个问题并使用 data 属性作为我们回调的输入,我们需要将 filter_action 设置为 'custom',然后我们自己实现过滤回调。幸运的是,Dash 文档在这里有一个如何执行此操作的示例:https://dash.plotly.com/datatable/filtering

使用这些文档中的信息,我能够得到一个可以工作的示例。我将留给您来弄清楚过滤方法的内部工作原理,但以下代码在本地为我运行,并显示了在过滤主 DataTable 时 DataTable 更新的方式。我所做的唯一其他修改是对 update_means 函数,我将其从返回 list 更改为返回 dict,这是data 属性。

import dash
from dash.dependencies import Input, Output
import dash_table
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import json
import numpy as np
from dash import Dash

# Instantiate df and calculate initial means
wide_data = pd.DataFrame(np.array([[1.24, 2.34, 3.234], [4.24, .45, .06], [7, 8, 9]]),
                         columns=['a', 'b', 'c'])
df = pd.DataFrame(wide_data)
df_floats = df.select_dtypes(include=['float64'])
df_floats_means = df_floats.mean(numeric_only=True)
df_floats_means = pd.DataFrame(df_floats_means)
df_floats_means_T = df_floats_means.T

# Operators used by custom filtering
operators = [['ge ', '>='],
             ['le ', '<='],
             ['lt ', '<'],
             ['gt ', '>'],
             ['ne ', '!='],
             ['eq ', '='],
             ['contains '],
             ['datestartswith ']]

# Initialize app
app = Dash(__name__)

# Init layout
app.layout = html.Div([
    html.H1('Customer Complaints Dashboard'),
    html.Label('Read and Writing Queries Filtering'),
    #     html.Label2('If "Read filter_query" is chosen, use operators (le, ge, <, <=, >, >=) + tag value to filter'),
    #     html.Label3('If "Write filter_query" is chosen, ___'),

    dcc.RadioItems(
        id='filter-query-read-write',
        options=[
            {'label': 'Read filter_query', 'value': 'read'},
        ],
        value='read'
    ),

    html.Br(),

    # this is for write functionality
    dcc.Input(id='filter-query-input', placeholder='Enter filter query'),

    html.Div(id='filter-query-output'),  # this is associated with read screen

    html.Hr(),

    dash_table.DataTable(
        id='datatable-advanced-filtering',
        columns=[
            {'name': i, 'id': i, 'deletable': True} for i in df.columns
            # omit the id column
            if i != 'id'
        ],

        data=df.to_dict('records'),
        editable=True,
        page_action='native',
        page_size=10,
        filter_action="custom",
        fixed_columns={'headers': True, 'data': 1},
        style_table={'minWidth': '100%'}
    ),

    html.Hr(),
    html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}),
    html.Hr(),
    html.Label('Dataframe Means'),

    dash_table.DataTable(
        id='datatable-advanced-filtering2',
        columns=[
            {'name': i, 'id': i, 'deletable': True} for i in df_floats_means_T.columns
            # omit the id column
            if i != 'id'
        ],

        data=df_floats_means_T.to_dict('records'),
        editable=True,
        page_action='native',
        page_size=10,
        filter_action="native",
        fixed_columns={'headers': True, 'data': 1},
        style_table={'minWidth': '100%'}
    ),


    html.Hr(),
    html.Div(id='datatable-query-structure2', style={'whitespace': 'pre'}),
    html.Hr(),



])


@app.callback(
    [Output('filter-query-input', 'style'),
     Output('filter-query-output', 'style')],
    [Input('filter-query-read-write', 'value')]
)
def query_input_output(val):
    input_style = {'width': '100%'}
    output_style = {}
    if val == 'read':
        input_style.update(display='none')
        output_style.update(display='inline-block')
    else:
        input_style.update(display='inline-block')
        output_style.update(display='none')
    return input_style, output_style


@app.callback(
    Output('filter-query-output', 'children'),
    [Input('datatable-advanced-filtering', 'filter_query')]
)
def read_query(query):
    if query is None:
        return "No filter query"
    return dcc.Markdown('`filter_query = "{}"`'.format(query))


# Callback to re-calculate means after filtering is
@app.callback(
    Output('datatable-advanced-filtering2', 'data'),
    [Input('datatable-advanced-filtering', 'data')]
)
def update_means(data):
    # Calculate means from data currently stored in top datatable
    dff = pd.DataFrame.from_dict(data)
    df_floats = dff.select_dtypes(include=['float64'])
    df_floats_means = df_floats.mean(numeric_only=True)
    df_floats_means = pd.DataFrame(df_floats_means)
    df_floats_means_T = df_floats_means.T

    # Return means to means datatable
    #THIS NOW RETURNS DICT INSTEAD OF LIST
    converted_means = df_floats_means_T.to_dict('records')
    return converted_means


def split_filter_part(filter_part):
    '''Helper function for custom filtering'''
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                name = name_part[name_part.find('{') + 1: name_part.rfind('}')]

                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part

                # word operators need spaces after them in the filter string,
                # but we don't want these later
                return name, operator_type[0].strip(), value

    return [None] * 3


@app.callback(
    Output('datatable-advanced-filtering', "data"),
    [Input('datatable-advanced-filtering', "filter_query")])
def update_table(filter):
    '''Callback that handles custom filtering of top datatable'''
    if filter is None:
        return df.to_dict('records')
    filtering_expressions = filter.split(' && ')
    dff = df
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)

        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            dff = dff.loc[dff[col_name].str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            dff = dff.loc[dff[col_name].str.startswith(filter_value)]

    return dff.to_dict('records')


if __name__ == '__main__':
    app.run_server(threaded=True)

@ncascale 非常感谢,但这里有一个小问题:

def update_table(filter):
    '''Callback that handles custom filtering of top datatable'''
    filtering_expressions = filter.split(' && ')
    dff = df
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)

        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            dff = dff.loc[dff[col_name].str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            dff = dff.loc[dff[col_name].str.startswith(filter_value)]

    return dff.to_dict('records')

有效,但弹出一个错误:

AttributeError: 'NoneType' object has no attribute 'split'

这是因为我必须首先使用 If 语句检查此处传递的参数 "filter" 是否必须 None?