无法将用户状态信息记录到成绩单

Unable to log user state information to transcript

我正在使用 TranscriptLoggerMiddleware 和 CosmosDB 来记录我的聊天机器人记录。我们正在尝试将用户状态信息(用户名、帐号、帐户类型等)捕获为成绩单中的顶级属性,以便可以轻松地在数据库中查询特定客户(如果该信息仅在各个时间戳属性中的文件,他们无法查询)。

理想情况下,我会在构建文件时添加用户状态,但我想不出任何方法来访问它,因为记录器是在 index.js 中定义的,而 TranscriptLoggerMiddleware 仅提供 activity 到我的功能,而不是完整的 context。如果有人有办法通过 TranscriptLoggerMiddleware 获取用户状态数据,请告诉我,这将解决此问题。这是 customLogger 代码。请注意,由于该函数同时接收用户查询和机器人响应,我无法检索和重新保存成绩单以正常工作,因此我从本地日志对象覆盖了成绩单。我不想在这里提出一种新方法,但如果有人能解决整个问题,我想听听。

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

const { CosmosDbPartitionedStorage } = require('botbuilder-azure');
const path = require('path');

/**
 * CustomLogger, takes in an activity and saves it for the duration of the conversation, writing to an emulator compatible transcript file in the transcriptsPath folder.
 */
class CustomLogger {
    /**
     * Log an activity to the log file.
     * @param activity Activity being logged.
     */
            
    // Set up Cosmos Storage
    constructor(appInsightsClient) {
        this.transcriptStorage = new CosmosDbPartitionedStorage({
            cosmosDbEndpoint: process.env.COSMOS_SERVICE_ENDPOINT,
            authKey: process.env.COSMOS_AUTH_KEY,
            databaseId: process.env.DATABASE,
            containerId: 'bot-transcripts'
        });
           
        this.conversationLogger = {};

        this.appInsightsClient = appInsightsClient;

        this.msDelay = 250;
    }
        
        
    async logActivity(activity) {
        
        if (!activity) {
            throw new Error('Activity is required.');
        }
        
        // Log only if this is type message
        if (activity.type === 'message') {
            
            if (activity.attachments) {
                try {
                    var logTextDb = `${activity.from.name}: ${activity.attachments[0].content.text}`;
                } catch (err) {
                    var logTextDb = `${activity.from.name}: ${activity.text}`;
                }
            } else {
                var logTextDb = `${activity.from.name}: ${activity.text}`;
            }
            
            if (activity.conversation) {
                var id = activity.conversation.id;
                if (id.indexOf('|') !== -1) {
                    id = activity.conversation.id.replace(/\|.*/, '');
                }
                      
                // Get today's date for datestamp
                var currentDate = new Date();
                var day = currentDate.getDate();
                var month = currentDate.getMonth()+1;
                var year = currentDate.getFullYear();
                var datestamp = year + '-' + month + '-' + day;
                var fileName = `${datestamp}_${id}`;
        
                var timestamp = Math.floor(Date.now()/1);
                
                // CosmosDB logging (JK)
                if (!(fileName in this.conversationLogger)) {
                    this.conversationLogger[fileName] = {};
                    this.conversationLogger[fileName]['userData'] = {};
                    this.conversationLogger[fileName]['botName'] = process.env.BOTNAME;
                }
            
                this.conversationLogger[fileName][timestamp] = logTextDb;
            
                let updateObj = {
                
                    [fileName]:{
                        ...this.conversationLogger[fileName]
                    }
                
                }
            
                // Add delay to ensure messages logged sequentially
                await this.wait(this.msDelay);
            
                try {
                    let result = await this.transcriptStorage.write(updateObj);
                } catch(err) {
                    console.log(err);
                    this.appInsightsClient.trackTrace({message: `Logger Error ${err.code} - ${path.basename(__filename)}`,severity: 3,properties: {'botName': process.env.BOTNAME, 'error':err.body}});
                }
            }
        }
    }
    async wait(milliseconds) {
        var start = new Date().getTime();
        for (var i = 0; i < 1e7; i++) {
            if ((new Date().getTime() - start) > milliseconds) {
                break;
            }
        }
    }
}
exports.CustomLogger = CustomLogger;

无法在此函数中获取用户状态,我决定尝试其他几种方法。最有前途的是创建一个单独的“updateTranscript”函数来获取成绩单、添加用户状态并将其保存回来。但我认为它只是根据用户请求捕获它,并在机器人响应时被本地对象再次覆盖。我添加了一个延迟来尝试解决这个问题,但它仍然没有用。在我提供客户编号的第一个提示中,用户状态数据被存储在副本中,但在下一个 activity 它消失了并且永远不会回来(即使我可以看到它应该被写入数据库)。这是更新函数。

const { CosmosDbStorage } = require('botbuilder-azure');

var updateTranscript = async (context, userData, appInsightsClient) => {
    const transcriptStorage = new CosmosDbStorage({
        serviceEndpoint: process.env.COSMOS_SERVICE_ENDPOINT,
        authKey: process.env.COSMOS_AUTH_KEY,
        databaseId: process.env.DATABASE,
        collectionId: 'bot-transcripts',
        partitionKey: process.env.BOTNAME
    });

    var id = context.activity.conversation.id;
    if (id.indexOf('|') !== -1) {
        id = context.activity.conversation.id.replace(/\|.*/, '');
    }

    // Get today's date for datestamp
    var currentDate = new Date();
    var day = currentDate.getDate();
    var month = currentDate.getMonth()+1;
    var year = currentDate.getFullYear();
    var datestamp = year + '-' + month + '-' + day;
    var filename = `${datestamp}_${id}`;

    var msDelay = 500;
    await new Promise(resolve => setTimeout(resolve, msDelay));
    
    var transcript = await transcriptStorage.read([filename]);

    transcript[filename]['userData'] = userData

    try {
        await transcriptStorage.write(transcript);
        console.log('User data added to transcript');
    } catch(err) {
        console.log(err);
        appInsightsClient.trackTrace({message: `Log Updater Error ${err.code} - ${path.basename(__filename)}`,severity: 3,properties: {'botName': process.env.BOTNAME, 'error':err.body}});
    }

    return;
}

module.exports.updateTranscript = updateTranscript

我意识到这种方法有点像集群,但我找不到更好的方法。我知道 Microsoft COVID-19 机器人有一个非常好的转录本检索功能,但我无法从他们那里得到任何关于如何实现这一功能的信息。除此之外,如果有人可以帮助我弄清楚如何在不被覆盖或 运行 进入并发问题的情况下将用户状态获取到成绩单中,我很高兴继续这个实现。

至于为什么我无法通过substring()函数查询帐号,这里有一个文档数据对象的例子。我不知道要检查哪个字符串的子字符串,在本例中为 122809。我不知道该时间戳可能是什么。如果它存储在顶层(例如 userData/accountNumber),我确切地知道在哪里寻找值。对于进一步的上下文,我已经显示了我在第一次提示输入帐号后看到的内容,其中填充了 userData。但是它在随后的写入中被覆盖了,即使我的 updateTranscript 函数有延迟,我似乎也无法取回它。

"document": {
        "userData": {},
        "botName": "AveryCreek_OEM_CSC_Bot_QA",
        "1594745997562": "AveryCreek_OEM_CSC_Bot_QA: Hi! I'm the OEM CSC Support Bot! Before we get started, can you please provide me with your 6-digit Vista number? If you don't have one, just type \"Skip\".",
        "1594746003973": "You: 122809",
        "1594746004241": "AveryCreek_OEM_CSC_Bot_QA: Thank you. What can I help you with today? \r\nYou can say **Menu** for a list of common commands, **Help** for chatbot tips, or choose one of the frequent actions below.  \r\n  \r\n  I'm still being tested, so please use our [Feedback Form](https://forms.office.com/Pages/ResponsePage.aspx?id=lVxS1ga5GkO5Jum1G6Q8xHnUJxcBMMdAqVUeyOmrhgBUNFI3VEhMU1laV1YwMUdFTkhYVzcwWk9DMiQlQCN0PWcu) to let us know how well I'm doing and how I can be improved!",
        "1594746011384": "You: what is my account number?",
        "1594746011652": "AveryCreek_OEM_CSC_Bot_QA: Here is the informaiton I have stored: \n   \n**Account Number:** 122809 \n\n I will forget everything except your account number after the end of this conversation.",
        "1594746011920": "AveryCreek_OEM_CSC_Bot_QA: I can clear your information if you don't want me to store it or if you want to reneter it. Would you like me to clear your information now?",
        "1594746016034": "You: no",
        "1594746016301": "AveryCreek_OEM_CSC_Bot_QA: OK, I won't clear your information. You can ask again at any time."
    },

"document": {
        "userData": {
            "accountNumber": "122809"
        },
        "botName": "AveryCreek_OEM_CSC_Bot_QA",
        "1594746019952": "AveryCreek_OEM_CSC_Bot_QA: Hi! I'm the OEM CSC Support Bot! What can I help you with today? \r\nYou can say **Menu** for a list of common commands, **Help** for chatbot tips, or choose one of the frequent actions below.  \r\n  \r\n  I'm still being tested, so please use our [Feedback Form](https://forms.office.com/Pages/ResponsePage.aspx?id=lVxS1ga5GkO5Jum1G6Q8xHnUJxcBMMdAqVUeyOmrhgBUNFI3VEhMU1laV1YwMUdFTkhYVzcwWk9DMiQlQCN0PWcu) to let us know how well I'm doing and how I can be improved!"
    },

您曾说过您遇到了并发问题,即使 JavaScript 是单线程的。听起来很奇怪,但我认为你在某种程度上是对的。 TranscriptLoggerMiddleware 确实有自己的缓冲区,用于存储整个回合的活动,然后尝试一次记录所有活动。它可以很容易地提供一种在您自己的记录器函数中获取整个缓冲区的方法,但它只是循环遍历缓冲区,这样您仍然只能单独记录它们。此外,它允许 logActivity 到 return 一个承诺,但它从不等待它,因此每个 activity 都会“同时”记录(它不是真正的同时,但代码可能会在函数调用之间跳转在等待他们完成之前)。对于任何非原子操作来说,这都是一个问题,因为您将在不知道其最新修改的情况下修改状态。

while (transcript.length > 0) {
    try {
        const activity: Activity = transcript.shift();
        // If the implementation of this.logger.logActivity() is asynchronous, we don't
        // await it as to not block processing of activities.
        // Because TranscriptLogger.logActivity() returns void or Promise<void>, we capture
        // the result and see if it is a Promise.
        const logActivityResult = this.logger.logActivity(activity);

        // If this.logger.logActivity() returns a Promise, a catch is added in case there
        // is no innate error handling in the method. This catch prevents
        // UnhandledPromiseRejectionWarnings from being thrown and prints the error to the
        // console.
        if (logActivityResult instanceof Promise) {
            logActivityResult.catch(err => {
                this.transcriptLoggerErrorHandler(err);
            });
        }
    } catch (err) {
        this.transcriptLoggerErrorHandler(err);
    }
}

总而言之,我认为转录记录器中间件不是解决问题的方法。虽然它可能旨在满足您的目的,但它存在太多问题。我要么编写自己的中间件,要么直接将中间件代码放入我的机器人逻辑中,如下所示:

async onTurn(turnContext) {
    const activity = turnContext.activity;
    
    await this.logActivity(turnContext, activity);
    
    turnContext.onSendActivities(async (ctx, activities, next) => {
        for (const activity of activities) {
            await this.logActivity(ctx, activity);
        }
        
        return await next();
    });

    // Bot code here

    // Save state changes
    await this.userState.saveChanges(turnContext);
}

async logActivity(turnContext, activity) {
    var transcript = await this.transcriptProperty.get(turnContext, []);
    transcript.push(activity);
    await this.transcriptProperty.set(turnContext, transcript);
    console.log('Activities saved: ' + transcript.length);
}

由于您的成绩单将存储在您的用户状态中,该用户状态也会有您需要的帐号,希望您能够查询它。

Kyle 的回答确实帮助我解决了问题,而且我认为对于遇到类似问题的任何人来说,这将是最可重用的部分。关键要点是,如果您使用的是 nodejs,则不应使用 TranscriptLoggerMiddleware,而应在 onTurn 处理程序中使用 Kyle 的函数(此处重复以供参考):

// Function provided by Kyle Delaney
async onTurn(turnContext) {
    const activity = turnContext.activity;
    
    await this.logActivity(turnContext, activity);
    
    turnContext.onSendActivities(async (ctx, activities, next) => {
        for (const activity of activities) {
            await this.logActivity(ctx, activity);
        }
        
        return await next();
    });

    // Bot code here

    // Save state changes
    await this.userState.saveChanges(turnContext);
}

不过,您需要注意,他的 logActivity 函数只是使用自定义 transcriptProperty 将原始活动存储到用户状态。到目前为止,我还没有找到一种好方法让 business/admin 用户以易于阅读和搜索的方式访问这些数据,也没有构建某种文件输出以发送给请求转录的客户他们的谈话。因此,我继续使用我的 CustomLogger。这是我如何做到的。

首先,您必须在构造函数中创建 transcriptLogger。如果你在你的 turn 处理程序中创建它,你将丢失 cache/buffer 并且它只会有最新的 activity 而不是完整的历史记录。可能是常识,但这让我短暂地绊倒了。我通过 this.transcriptLogger = new CustomerLogger(appInsightsClient); 在构造函数中执行此操作。我还修改了 logActivity 函数以接受 userData(我的状态 object)作为第二个可选参数。我已经成功地能够使用该 userData object 将所需的客户信息添加到机器人脚本中。要修改上面 Kyle 的函数,您只需将 this.logActivity 替换为您的函数调用,在我的例子中是 this.transcriptLogger.logActivity(context, userData);.

虽然这种方法还有一些其他问题,但它确实解决了标题问题,即如何将用户状态数据放入转录本中。