excel 中的单元格和行的格式颜色使用 pandas

formatting colour for cell and row in excel using pandas

我的要求突出显示这些单元格与 "yellow" 的不同,添加的新行将在 excel 中以红色突出显示作为输出。

load1 数据集

StudentID   Visit1      Visit 2     Visit 3     Visit 4
1           16-04-20    23-04-20    30-04-20    07-05-20
2           17-04-20    24-04-20    01-05-20    08-05-20
3           18-04-20    25-04-20    02-05-20    09-05-20
4           19-04-20    26-04-20    03-05-20    10-05-20
5           20-04-20    27-04-20    04-05-20    11-05-20
6           21-04-20    28-04-20    05-05-20    12-05-20
7           22-04-20    29-04-20    06-05-20    13-05-20
8           23-04-20    30-04-20    07-05-20    14-05-20

load2table

StudentID   Visit1      Visit 2     Visit 3     Visit 4
1           16-04-20    23-04-20    30-04-20    07-05-20
2           17-04-20    24-04-20    01-05-20    08-05-20
3           18-04-20    25-09-20    02-05-20    09-05-20
4           19-04-20    26-04-20    03-06-20    10-05-20
5           20-04-20    27-04-20    04-05-20    11-05-20
6           21-04-20    28-04-20    05-05-20    12-06-20
7           22-04-20    29-08-20    06-05-20    13-05-20
8           23-04-20    30-04-20    07-05-20    14-05-20
9           15-05-20    16-05-20    17-05-20    18-05-20

输出。我正在寻找 excel 格式的输出,颜色高亮显示。

我是这个 pandas 的新手,我能够在数据帧级别获得不同的差异。不在输出的 excel 处使用 colours.Please 帮助格式化每个单元格和行。

以下是我试图找出差异的脚本。

import pandas as pd
import os
import numpy as np
colour1= pd.read_excel('pandas.xlsx',sheet_name='load1')
colour2= pd.read_excel('pandas.xlsx',sheet_name='load2')
colour_merge=colour1.merge(colour2,left_on='StudentID', right_on='StudentID',how='outer')
colour_merge['Visit1dif']= np.where(colour_merge['Visit1_x']==colour_merge['Visit1_y'],0,1)
colour_merge['Visit2dif']= np.where(colour_merge['Visit 2_x']==colour_merge['Visit 2_y'],0,1)
colour_merge['Visit3dif']= np.where(colour_merge['Visit 3_x']==colour_merge['Visit 3_y'],0,1)
colour_merge['Visit4dif']= np.where(colour_merge['Visit 4_x']==colour_merge['Visit 4_y'],0,1)
colour_merge[['StudentID','Visit1_x','Visit1_y','Visit1dif','Visit 2_x','Visit 2_y','Visit2dif','Visit 3_x','Visit 3_y','Visit3dif','Visit 4_x','Visit 4_y','Visit4dif']]

我想你有两个 sheets load1load2,并且想要和第三个显示在图片中。

为了添加新的 sheet,pandas.ExcelWriter 应该以 append 模式打开,并且需要 openpyxl 库以及 pandas.DataFrame.to_excelconditional_formatting 函数:

import pandas as pd
import openpyxl
import xlrd

from openpyxl.styles import Alignment, Font, NamedStyle, PatternFill, colors
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting.rule import Rule, ColorScaleRule, CellIsRule, FormulaRule

file = r'C:\app\excel\pandas.xlsx'
xls = xlrd.open_workbook(file)
sht = xls.sheet_names()

colour1= pd.read_excel(file,sheet_name=sht[0])
colour2= pd.read_excel(file,sheet_name=sht[1])
colour_merge = colour1.merge(colour2,left_on='StudentID',right_on='StudentID',how='outer')
colour_merge = colour_merge[['StudentID','Visit 1_x','Visit 1_y','Visit 2_x','Visit 2_y','Visit 3_x','Visit 3_y','Visit 4_x','Visit 4_y']]

l_sid = []
mxc1 = max(colour1['StudentID'])+1
mxc2 = max(colour2['StudentID'])+1
for j in range(min(mxc1,mxc2),max(mxc1,mxc2)):
    l_sid.append(j+1)

writer_args = { 'path': file, 'mode': 'a', 'engine': 'openpyxl'}

with pd.ExcelWriter(**writer_args) as xlsx:
    colour_merge.to_excel(xlsx, 'load3', index=False)
    ws = xlsx.sheets['load3']
    mxc= ws.max_column


    title_row = '1'

    yellow = PatternFill(bgColor=colors.YELLOW)
    red    = PatternFill(bgColor=colors.RED)    
    i=1    
    while i <= mxc*2:
        l_col = []
        l_col.append(chr(i+65))
        l_col.append(chr(i+65+1))

        for j in range(2,mxc+1):
            for k in l_col:
                if j not in l_sid:
                    ws.conditional_formatting.add(k+str(j), FormulaRule(formula=[l_col[0]+'$'+str(j)+'<>'+l_col[1]+'$'+str(j)], stopIfTrue=True, fill=yellow))

        i+=2


    r = Rule(type="expression", dxf=DifferentialStyle(fill=red), stopIfTrue=False)
    r.formula = ['1=1']

    while l_sid:
        el=l_sid.pop(-1)        
        ws.conditional_formatting.add('A'+str(el)+':'+chr(64+mxc)+str(el), r) 


xlsx.save()