计算多项选择题的正确数

Calculating number of correct of multiple choice questions

我有学生回答问题的数据。格式是这样的

Student     Q1   Q2  Q3    Q4
A            1   3   2    3
B            2   3   2    2    
C            1   2   1    2
D            3   3   1    2

对于这个例子,假设 1 是问题 1 的正确答案,2 是问题 2,3 和 4 的正确答案。

我如何生成一个统计数据 table 来告诉我学生正确回答了多少个问题?在上面的示例中,它会说类似

Student    Answered Correct:
A          2/4

首先您必须创建变量 num_questions 并将其设置为问题数。然后你需要编写与问题一样多的 if-then-else 语句来创建二进制变量(标志)以检查每个答案是否正确(例如 Correct_Q1)。使用 sum(of Correct:) 得到每个学生的正确答案总数。 Correct: 引用所有以 'Correct'.

开头的变量名
data want;
    set have;
    num_questions = 4;
    if Q1 = 1 then Correct_Q1 = 1; else Correct_Q1 = 0;
    if Q2 = 2 then Correct_Q2 = 1; else Correct_Q2 = 0;
    if Q3 = 2 then Correct_Q3 = 1; else Correct_Q3 = 0;
    if Q4 = 2 then Correct_Q4 = 1; else Correct_Q4 = 0;
    format Answered_Correct . Answered_Correct_pct percent.;
    Answered_Correct = compress(put(sum(of Correct:),.)||'/'||put(num_questions, 8.));
    Answered_Correct_pct = sum(of Correct:) / num_questions;
    label Student = 'Student' Answered_Correct = 'Answered correct' Answered_Correct_pct = 'Answered correct (%)';
    keep Student Answered_Correct Answered_Correct_pct;
run;

proc print data=want noobs label;
run;

如果您只有四个问题,最快的解决方案可能是只使用条件语句:if Q1 = 1 then answer + 1; 对于使用 lookup/answer table:

的更通用的解决方案

转置数据,合并答案table,对学生进行总结。

data broad_data;
   infile datalines missover;
   input Student $ Q1   Q2  Q3    Q4;
   datalines;
A            1   3   2    3
B            2   3   2    2    
C            1   2   1    2
D            3   3   1    2
;

data answers;
   infile datalines missover;
   input question $ correct_answer ;
   datalines;
Q1          1  
Q2          2  
Q3          2  
Q4          2 
;


data long_data;
    set broad_data;
    length question  answer 8;
    array long[*] Q1--Q4; 

    do i = 1 to dim(long);
        question    = vname(long[i]);
        answer      = long[i];
        output;
    end;
    keep Student question answer;
run;
proc sort data = long_data; by question student; run;

data long_data_answers;
    merge  long_data
           answers
           ;
    by question;
run;
proc sort data = long_data_answers; by student; run;

data result;
    do i = 1 by 1 until (last.student);
        set long_data_answers;
        by student;
        count = sum(count, answer eq correct_answer);
    end;
    result = count/i;
    keep student result;
    format result fract8.;
run;

如果您喜欢 sql/want 压缩您的代码,您可以将最后两个数据步骤 + 排序合并到一个语句中。

proc sql;
create table result as
    select student, sum(answer eq correct_answer)/count(*) as result format fract8.
        from long_data a
            inner join answers b
            on a.question eq b.question
        group by student
    ;
quit;

您可以创建一个包含正确答案的数组,然后循环遍历学生的答案来比较它们。

我已经将最终变量创建为字符,以您显示的格式显示。显然,这意味着您无法访问基础值,因此您可能希望在数据中保留正确答案的数量以用于其他分析目的。

data have;
input Student $ Q1 Q2 Q3 Q4;
datalines;
A 1 3 2 3
B 2 3 2 2
C 1 2 1 2
D 3 3 1 2
;
run;

data want;
set have;
array correct{4} (1 2 3 4); /* create array of correct answers */
array answer{4} q1-q4; /* create array of student answers */
_count=0; /* reset count to 0 */
do i = 1 to dim(correct);
    if answer{i} = correct{i} then _count+1; /* compare student answer to correct answer and increment count by 1 if they match */
end;
length answered_correct ; /* set length for variable */
answered_correct = catx('/',_count,dim(correct)); /* display result in required format */
drop q: correct: i _count; /* drop unwanted variables */
run;