启动一个线程使主线程超时,但允许主线程暂停定时器
Starting a thread to timeout the main thread, but allow the main thread to pause the timer
我有一个 C 函数 expensive_call
,我想在其中添加一个 'timeout'。为此,我使用 pthreads:我创建了一个单独的线程,它调用 nanosleep
然后向主线程发送一个信号 (SIGUSR1
)。
但是,允许主线程将某些代码片段标记为不计入超时。所以我想到了主线程可以向定时器线程发送一个信号(SIGUSR2
)到pause/resume定时器。
当主线程接收到 SIGUSR1
时,我使用 sigsetjmp
/siglongjmp
到 return 从昂贵的调用。 SIGUSR2
的信号处理程序为空。
我目前的实施有两个问题:
- 有时候收到
SIGUSR2
但是nanosleep
没有停止,反正expensive_call
就中断了。 (为此,我尝试在 expensive_call
中的 for (;;);
正上方添加 sched_yield();
以允许计时器线程接管,但这没有任何效果。)
- 此解决方案同时需要
SIGUSR1
和 SIGUSR2
,我认为我不必为此同时使用两者。
欢迎提出解决这些问题的任何想法!
下面程序的预期输出是:
[main thread] start expensive call
[timer thread] received SIGUSR2
[timer thread] pausing timer
(does not terminate)
但有时我们会得到(这是上面的问题 1):
[main thread] start expensive call
[timer thread] received SIGUSR2
[timer thread] killing main thread...
[main thread] received SIGUSR1
[main thread] expensive_call() was interrupted
程序本身:
#include <errno.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
static pthread_t main_thread,timer_thread;
static jmp_buf restore_point;
static void handle_sigusr1 (int sig)
{
fprintf (stderr,"received SIGUSR1\n");
siglongjmp (restore_point,sig);
}
static void handle_sigusr2 (int sig)
{
fprintf (stderr,"received SIGUSR2\n");
}
static void *timer (void *arg)
{
struct timespec timeout;
sigset_t sigset;
int _unused;
pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS,&_unused);
/* We ignore everything except SIGUSR2, which is used in sigwait below */
sigfillset (&sigset);
sigdelset (&sigset,SIGUSR2);
pthread_sigmask (SIG_SETMASK,&sigset,NULL);
sigemptyset (&sigset);
sigaddset (&sigset,SIGUSR2);
timeout.tv_sec=1;
timeout.tv_nsec=0;
/* On interrupt, wait for SIGUSR2, then continue to sleep */
while (nanosleep (&timeout,&timeout) == -1 && errno==EINTR){
fprintf (stderr,"pausing timer\n");
sigwait (&sigset,&_unused);
fprintf (stderr,"continuing timer\n");
}
fprintf (stderr,"killing main thread...\n");
pthread_kill (main_thread,SIGUSR1);
return NULL;
}
static void expensive_call (void)
{
fprintf (stderr,"start expensive call\n");
pthread_kill (timer_thread,SIGUSR2);
for (;;);
pthread_kill (timer_thread,SIGUSR2);
fprintf (stderr,"end expensive call\n");
}
void main (void)
{
struct sigaction signal_handler;
/* Install signal handlers */
signal_handler.sa_handler=handle_sigusr1;
sigemptyset(&signal_handler.sa_mask);
signal_handler.sa_flags=SA_RESTART;
if (sigaction (SIGUSR1,&signal_handler,NULL)!=0)
perror ("sigaction");
signal_handler.sa_handler=handle_sigusr2;
if (sigaction (SIGUSR2,&signal_handler,NULL)!=0)
perror ("sigaction");
/* Setup threads */
main_thread=pthread_self();
pthread_create (&timer_thread,NULL,timer,NULL);
/* Actual computation */
if (sigsetjmp (restore_point,1)!=0){
fprintf (stderr,"expensive_call() was interrupted\n");
} else {
expensive_call();
}
/* Cleanup */
pthread_cancel (timer_thread);
pthread_join (timer_thread,NULL);
}
我不清楚您为什么不使用信号量等实现概念。
信号量是在线程之间发出信号的一种常用且最简单的方法。但它要求您为信号量编写检查点以触发函数。
编辑:
分解信号量可以看作是一个全局变量(或者至少在所涉及线程的代码范围内)。虽然真正的目的是表明某个部分已准备好让另一个线程处理它,但您可以只使用其中的一小部分:全局变量。
据我了解,您的方法是尝试实现一种软件看门狗。如果看门狗用完特定时间以检测是否存在未定义的软件状态或死锁,则看门狗基本上会重置。因此,我会采用简单的解决方案来设置一个全局变量,您可以接受(带有一些安全缓冲区)的时间,并让计时器线程倒计时。如果它达到 0,则终止主线程。
要暂停或跳过您无法控制但值得信赖的昂贵调用,我会将时间设置为最大值并终止线程。在昂贵的调用返回后恢复重新创建。
我已经根据 what sbo 使用互斥锁和条件变量实现了一个解决方案。条件变量用于双向发信号。
主线程启动定时器线程,等待条件变量。这允许定时器线程启动,解决了我的第一个问题。
计时器线程向条件变量发出信号并执行 pthread_cond_timedwait()
等待至多超时。
如果主线程及时结束,它会取消定时器线程。否则,计时器线程会在 ETIMEDOUT
发生时杀死主线程。
为了允许 expensive_call()
中的部分不计入超时,主线程在进入这样的部分之前和退出之后向条件变量发出信号。计时器线程收到此信号后,计算剩余超时时间,并在主线程退出未计数部分后继续执行步骤 2。
#include <errno.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
static pthread_t main_thread,timer_thread;
static pthread_mutex_t mutex=PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cv;
static void *timer (void *arg)
{
struct timespec start_time,wait_time,end_time;
int _unused,err;
pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS,&_unused);
wait_time.tv_sec=1;
wait_time.tv_nsec=0;
clock_gettime (CLOCK_MONOTONIC,&start_time);
end_time.tv_sec=start_time.tv_sec+wait_time.tv_sec;
end_time.tv_nsec=start_time.tv_nsec+wait_time.tv_nsec;
for (;;)
{
pthread_cond_signal (&cv);
err=pthread_cond_timedwait (&cv,&mutex,&end_time);
if (!err){
clock_gettime (CLOCK_MONOTONIC,&end_time);
wait_time.tv_sec=wait_time.tv_sec-(end_time.tv_sec-start_time.tv_sec);
wait_time.tv_nsec=wait_time.tv_nsec-(end_time.tv_nsec-start_time.tv_nsec);
pthread_cond_signal (&cv);
pthread_cond_wait (&cv,&mutex);
clock_gettime (CLOCK_MONOTONIC,&start_time);
end_time.tv_sec=start_time.tv_sec+wait_time.tv_sec;
end_time.tv_nsec=start_time.tv_nsec+wait_time.tv_nsec;
continue;
}
if (err==ETIMEDOUT){
fprintf (stderr,"killing main thread...\n");
pthread_mutex_unlock (&mutex);
pthread_kill (main_thread,SIGUSR1);
break;
} else {
perror ("pthread_mutex_timedlock");
}
}
return NULL;
}
static void enter_uncounted_section (void)
{
pthread_cond_signal (&cv);
pthread_cond_wait (&cv,&mutex);
pthread_mutex_unlock (&mutex);
}
static void exit_uncounted_section (void)
{
pthread_cond_signal (&cv);
pthread_cond_wait (&cv,&mutex);
pthread_mutex_unlock (&mutex);
}
static void expensive_call (void)
{
fprintf (stderr,"start expensive call\n");
enter_uncounted_section();
for (long long int i=0; i<1000000000L; i++);
exit_uncounted_section();
fprintf (stderr,"exited uncounted section\n");
for (long long int i=0; i<1000000000L; i++);
fprintf (stderr,"end expensive call\n");
}
static jmp_buf restore_point;
static void handle_sigusr1 (int sig)
{
fprintf (stderr,"received SIGUSR1\n");
siglongjmp (restore_point,sig);
}
void main (void)
{
struct sigaction signal_handler;
pthread_condattr_t attr;
/* Install signal handlers */
signal_handler.sa_handler=handle_sigusr1;
sigemptyset(&signal_handler.sa_mask);
signal_handler.sa_flags=SA_RESTART;
if (sigaction (SIGUSR1,&signal_handler,NULL)!=0)
perror ("sigaction");
/* Setup threads */
pthread_condattr_init (&attr);
pthread_condattr_setclock (&attr,CLOCK_MONOTONIC);
pthread_cond_init (&cv,&attr);
main_thread=pthread_self();
pthread_create (&timer_thread,NULL,timer,NULL);
/* Actual computation */
if (sigsetjmp (restore_point,1)!=0){
fprintf (stderr,"expensive_call() was interrupted\n");
} else {
pthread_cond_wait (&cv,&mutex);
pthread_mutex_unlock (&mutex);
expensive_call();
}
/* Cleanup */
pthread_cancel (timer_thread);
pthread_join (timer_thread,NULL);
pthread_cond_destroy (&cv);
pthread_mutex_destroy (&mutex);
}
我有一个 C 函数 expensive_call
,我想在其中添加一个 'timeout'。为此,我使用 pthreads:我创建了一个单独的线程,它调用 nanosleep
然后向主线程发送一个信号 (SIGUSR1
)。
但是,允许主线程将某些代码片段标记为不计入超时。所以我想到了主线程可以向定时器线程发送一个信号(SIGUSR2
)到pause/resume定时器。
当主线程接收到 SIGUSR1
时,我使用 sigsetjmp
/siglongjmp
到 return 从昂贵的调用。 SIGUSR2
的信号处理程序为空。
我目前的实施有两个问题:
- 有时候收到
SIGUSR2
但是nanosleep
没有停止,反正expensive_call
就中断了。 (为此,我尝试在expensive_call
中的for (;;);
正上方添加sched_yield();
以允许计时器线程接管,但这没有任何效果。) - 此解决方案同时需要
SIGUSR1
和SIGUSR2
,我认为我不必为此同时使用两者。
欢迎提出解决这些问题的任何想法!
下面程序的预期输出是:
[main thread] start expensive call
[timer thread] received SIGUSR2
[timer thread] pausing timer
(does not terminate)
但有时我们会得到(这是上面的问题 1):
[main thread] start expensive call
[timer thread] received SIGUSR2
[timer thread] killing main thread...
[main thread] received SIGUSR1
[main thread] expensive_call() was interrupted
程序本身:
#include <errno.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
static pthread_t main_thread,timer_thread;
static jmp_buf restore_point;
static void handle_sigusr1 (int sig)
{
fprintf (stderr,"received SIGUSR1\n");
siglongjmp (restore_point,sig);
}
static void handle_sigusr2 (int sig)
{
fprintf (stderr,"received SIGUSR2\n");
}
static void *timer (void *arg)
{
struct timespec timeout;
sigset_t sigset;
int _unused;
pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS,&_unused);
/* We ignore everything except SIGUSR2, which is used in sigwait below */
sigfillset (&sigset);
sigdelset (&sigset,SIGUSR2);
pthread_sigmask (SIG_SETMASK,&sigset,NULL);
sigemptyset (&sigset);
sigaddset (&sigset,SIGUSR2);
timeout.tv_sec=1;
timeout.tv_nsec=0;
/* On interrupt, wait for SIGUSR2, then continue to sleep */
while (nanosleep (&timeout,&timeout) == -1 && errno==EINTR){
fprintf (stderr,"pausing timer\n");
sigwait (&sigset,&_unused);
fprintf (stderr,"continuing timer\n");
}
fprintf (stderr,"killing main thread...\n");
pthread_kill (main_thread,SIGUSR1);
return NULL;
}
static void expensive_call (void)
{
fprintf (stderr,"start expensive call\n");
pthread_kill (timer_thread,SIGUSR2);
for (;;);
pthread_kill (timer_thread,SIGUSR2);
fprintf (stderr,"end expensive call\n");
}
void main (void)
{
struct sigaction signal_handler;
/* Install signal handlers */
signal_handler.sa_handler=handle_sigusr1;
sigemptyset(&signal_handler.sa_mask);
signal_handler.sa_flags=SA_RESTART;
if (sigaction (SIGUSR1,&signal_handler,NULL)!=0)
perror ("sigaction");
signal_handler.sa_handler=handle_sigusr2;
if (sigaction (SIGUSR2,&signal_handler,NULL)!=0)
perror ("sigaction");
/* Setup threads */
main_thread=pthread_self();
pthread_create (&timer_thread,NULL,timer,NULL);
/* Actual computation */
if (sigsetjmp (restore_point,1)!=0){
fprintf (stderr,"expensive_call() was interrupted\n");
} else {
expensive_call();
}
/* Cleanup */
pthread_cancel (timer_thread);
pthread_join (timer_thread,NULL);
}
我不清楚您为什么不使用信号量等实现概念。
信号量是在线程之间发出信号的一种常用且最简单的方法。但它要求您为信号量编写检查点以触发函数。
编辑: 分解信号量可以看作是一个全局变量(或者至少在所涉及线程的代码范围内)。虽然真正的目的是表明某个部分已准备好让另一个线程处理它,但您可以只使用其中的一小部分:全局变量。
据我了解,您的方法是尝试实现一种软件看门狗。如果看门狗用完特定时间以检测是否存在未定义的软件状态或死锁,则看门狗基本上会重置。因此,我会采用简单的解决方案来设置一个全局变量,您可以接受(带有一些安全缓冲区)的时间,并让计时器线程倒计时。如果它达到 0,则终止主线程。
要暂停或跳过您无法控制但值得信赖的昂贵调用,我会将时间设置为最大值并终止线程。在昂贵的调用返回后恢复重新创建。
我已经根据 what sbo
主线程启动定时器线程,等待条件变量。这允许定时器线程启动,解决了我的第一个问题。
计时器线程向条件变量发出信号并执行
pthread_cond_timedwait()
等待至多超时。如果主线程及时结束,它会取消定时器线程。否则,计时器线程会在
ETIMEDOUT
发生时杀死主线程。为了允许
expensive_call()
中的部分不计入超时,主线程在进入这样的部分之前和退出之后向条件变量发出信号。计时器线程收到此信号后,计算剩余超时时间,并在主线程退出未计数部分后继续执行步骤 2。
#include <errno.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
static pthread_t main_thread,timer_thread;
static pthread_mutex_t mutex=PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cv;
static void *timer (void *arg)
{
struct timespec start_time,wait_time,end_time;
int _unused,err;
pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS,&_unused);
wait_time.tv_sec=1;
wait_time.tv_nsec=0;
clock_gettime (CLOCK_MONOTONIC,&start_time);
end_time.tv_sec=start_time.tv_sec+wait_time.tv_sec;
end_time.tv_nsec=start_time.tv_nsec+wait_time.tv_nsec;
for (;;)
{
pthread_cond_signal (&cv);
err=pthread_cond_timedwait (&cv,&mutex,&end_time);
if (!err){
clock_gettime (CLOCK_MONOTONIC,&end_time);
wait_time.tv_sec=wait_time.tv_sec-(end_time.tv_sec-start_time.tv_sec);
wait_time.tv_nsec=wait_time.tv_nsec-(end_time.tv_nsec-start_time.tv_nsec);
pthread_cond_signal (&cv);
pthread_cond_wait (&cv,&mutex);
clock_gettime (CLOCK_MONOTONIC,&start_time);
end_time.tv_sec=start_time.tv_sec+wait_time.tv_sec;
end_time.tv_nsec=start_time.tv_nsec+wait_time.tv_nsec;
continue;
}
if (err==ETIMEDOUT){
fprintf (stderr,"killing main thread...\n");
pthread_mutex_unlock (&mutex);
pthread_kill (main_thread,SIGUSR1);
break;
} else {
perror ("pthread_mutex_timedlock");
}
}
return NULL;
}
static void enter_uncounted_section (void)
{
pthread_cond_signal (&cv);
pthread_cond_wait (&cv,&mutex);
pthread_mutex_unlock (&mutex);
}
static void exit_uncounted_section (void)
{
pthread_cond_signal (&cv);
pthread_cond_wait (&cv,&mutex);
pthread_mutex_unlock (&mutex);
}
static void expensive_call (void)
{
fprintf (stderr,"start expensive call\n");
enter_uncounted_section();
for (long long int i=0; i<1000000000L; i++);
exit_uncounted_section();
fprintf (stderr,"exited uncounted section\n");
for (long long int i=0; i<1000000000L; i++);
fprintf (stderr,"end expensive call\n");
}
static jmp_buf restore_point;
static void handle_sigusr1 (int sig)
{
fprintf (stderr,"received SIGUSR1\n");
siglongjmp (restore_point,sig);
}
void main (void)
{
struct sigaction signal_handler;
pthread_condattr_t attr;
/* Install signal handlers */
signal_handler.sa_handler=handle_sigusr1;
sigemptyset(&signal_handler.sa_mask);
signal_handler.sa_flags=SA_RESTART;
if (sigaction (SIGUSR1,&signal_handler,NULL)!=0)
perror ("sigaction");
/* Setup threads */
pthread_condattr_init (&attr);
pthread_condattr_setclock (&attr,CLOCK_MONOTONIC);
pthread_cond_init (&cv,&attr);
main_thread=pthread_self();
pthread_create (&timer_thread,NULL,timer,NULL);
/* Actual computation */
if (sigsetjmp (restore_point,1)!=0){
fprintf (stderr,"expensive_call() was interrupted\n");
} else {
pthread_cond_wait (&cv,&mutex);
pthread_mutex_unlock (&mutex);
expensive_call();
}
/* Cleanup */
pthread_cancel (timer_thread);
pthread_join (timer_thread,NULL);
pthread_cond_destroy (&cv);
pthread_mutex_destroy (&mutex);
}