如何在我的 R 数据集上应用 propt.test?

How to apply propt.test on my dataset in R?

我试图分析 Aids2,我想将 "prop.test" 应用于男性与女性中通过 "hs" 方法感染的比例。我该怎么做?

这是我的数据集的一部分:

  state sex  diag death status T.categ age
1      NSW   M 10905 11081      D      hs  35
2      NSW   M 11029 11096      D      hs  53
3      NSW   M  9551  9983      D      hs  42
4      NSW   M  9577  9654      D    haem  44
5      NSW   M 10015 10290      D      hs  39
6      NSW   M  9971 10344      D      hs  36
7      NSW   M 10746 11135      D   other  36
8      NSW   M 10042 11069      D      hs  31
9      NSW   M 10464 10956      D      hs  26
10     NSW   M 10439 10873      D    hsid  27
11     NSW   M 10416 10432      D      hs  45
12     NSW   M 10216 10524      D      hs  36
13     NSW   M 10385 10477      D      hs  27
14     NSW   M 10366 10631      D      hs  35
15     NSW   M 10452 11504      A      hs  30
16     NSW   M 10552 10684      D      hs  39
17     NSW   M 10673 11200      D      hs  30
18     NSW   M 10923 11504      A    haem  21
19     NSW   M 10993 11504      A      hs  56
20     NSW   M 11020 11171      D      hs  41
21     NSW   M 10805 10877      D      hs  28
22     NSW   M 10996 11504      A      hs  38
23     NSW   M 10738 11504      A     het  26
24     NSW   M 11063 11504      A      id  39
25     NSW   M 10885 11196      D      hs  46
26     NSW   M 11056 11504      A    haem  13
27     NSW   M 11283 11504      A      hs  34
28     NSW   M 11195 11504      A     het  39
29     NSW   M 10848 11504      A      hs  31
30     NSW   M 11289 11504      A  mother   1
31     NSW   F 10961 11504      A      id  30
32     NSW   M 11311 11312      D   blood  37
33     NSW   M 11337 11504      A      hs  38
34     NSW   M 11458 11463      D      hs  33
35     NSW   M 11480 11504      A      hs  30
36     NSW   M 11462 11504      A      hs  40
37     NSW   M  8302  8469      D      hs  51
38     NSW   M  8711  8850      D      hs  29
39     NSW   M  8726  9254      D    hsid  29
40     NSW   M  8760  8959      D    hsid  37
41     NSW   M  8802  8879      D      hs  46
42     NSW   M  8877  9180      D      hs  37
43     NSW   M  9011  9696      D   blood  54
44     NSW   M  8990  9175      D      hs  30
45     NSW   M  9063  9172      D   blood  25
46     NSW   M  9003  9109      D    hsid  26
47     NSW   M  9022  9218      D      hs  41
48     NSW   M  8985  9254      D      hs  41
49     NSW   M  9030  9781      D      hs  27
50     NSW   M  9086  9314      D      hs  35
51     NSW   M  9015  9943      D      hs  35
52     NSW   M  9009  9350      D      hs  25
53     NSW   M  8970  9240      D      hs  34
54     NSW   M  9171  9309      D      hs  35
55     NSW   M  9087  9598      D      hs  33
56     NSW   M  9115  9686      D      hs  31
57     NSW   M  9065  9262      D      hs  43
58     NSW   M  9104  9126      D      hs  59
59     NSW   M  9028  9532      D      hs  31
60     NSW   M  9101  9268      D      hs  41
61     NSW   M  9096  9226      D      hs  34
62     NSW   M  9128  9660      D      hs  37
63     NSW   M  9125  9207      D      hs  31
64     NSW   M  9083  9682      D      hs  37
65     NSW   M  9150  9285      D      hs  38
66     NSW   F  9014  9152      D   blood  44
67     NSW   M  9157  9962      D      hs  41
68     NSW   M  9098  9418      D      hs  41
69     NSW   M  8913  9082      D      hs  32
70     NSW   M  9141  9222      D      hs  40
71     NSW   M  9158  9920      D      hs  23
72     NSW   M  9167 10461      D      hs  42
73     NSW   M  9244  9379      D      hs  33
74     NSW   M  9138  9565      D      hs  47
75     NSW   M  9222  9536      D      hs  52
76     NSW   M  9272  9290      D      hs  35
77     NSW   M  9131  9392      D      hs  38
78     NSW   M  9236 10013      D      hs  23
79     NSW   M  9145  9250      D      hs  45
80     NSW   M  8964  9300      D    haem  48
81     NSW   M  9207  9768      D      hs  32
82     NSW   M  9240  9447      D      hs  38
83     NSW   M  9281  9723      D      hs  25
84     NSW   M  9300  9736      D      hs  36
85     NSW   M  9294 10070      D      hs  39
86     NSW   F  9258  9259      D   blood  25
87     NSW   M  9145  9436      D      hs  33
88     NSW   M  9310  9533      D      hs  35
89     NSW   M  9344 11320      D      hs  49
90     NSW   M  9185  9214      D      hs  38
91     NSW   M  9247  9549      D      hs  30
92     NSW   M  9201  9315      D      hs  44
93     NSW   F  9349  9392      D   blood  55
94     NSW   M  9246  9956      D      hs  31
95     NSW   M  9273 10018      D      hs  32
96     NSW   M  9241  9576      D      hs  29
97     NSW   M  9264  9451      D      hs  42
98     NSW   M  9310  9730      D      hs  28

你能帮帮我吗,因为我是数据分析的初学者,我不知道如何应用这种类型的测试("prop.test)。

预先感谢您的帮助!

我正在使用您的示例数据集:

df = read.table(text = "
state sex  diag death status T.categ age
1      NSW   M 10905 11081      D      hs  35
2      NSW   M 11029 11096      D      hs  53
3      NSW   M  9551  9983      D      hs  42
4      NSW   M  9577  9654      D    haem  44
5      NSW   M 10015 10290      D      hs  39
6      NSW   M  9971 10344      D      hs  36
7      NSW   M 10746 11135      D   other  36
8      NSW   M 10042 11069      D      hs  31
9      NSW   M 10464 10956      D      hs  26
10     NSW   M 10439 10873      D    hsid  27
11     NSW   M 10416 10432      D      hs  45
12     NSW   M 10216 10524      D      hs  36
13     NSW   M 10385 10477      D      hs  27
14     NSW   M 10366 10631      D      hs  35
15     NSW   M 10452 11504      A      hs  30
16     NSW   M 10552 10684      D      hs  39
17     NSW   M 10673 11200      D      hs  30
18     NSW   M 10923 11504      A    haem  21
19     NSW   M 10993 11504      A      hs  56
20     NSW   M 11020 11171      D      hs  41
21     NSW   M 10805 10877      D      hs  28
22     NSW   M 10996 11504      A      hs  38
23     NSW   M 10738 11504      A     het  26
24     NSW   M 11063 11504      A      id  39
25     NSW   M 10885 11196      D      hs  46
26     NSW   M 11056 11504      A    haem  13
27     NSW   M 11283 11504      A      hs  34
28     NSW   M 11195 11504      A     het  39
29     NSW   M 10848 11504      A      hs  31
30     NSW   M 11289 11504      A  mother   1
31     NSW   F 10961 11504      A      id  30
32     NSW   M 11311 11312      D   blood  37
33     NSW   M 11337 11504      A      hs  38
34     NSW   M 11458 11463      D      hs  33
35     NSW   M 11480 11504      A      hs  30
36     NSW   M 11462 11504      A      hs  40
37     NSW   M  8302  8469      D      hs  51
38     NSW   M  8711  8850      D      hs  29
39     NSW   M  8726  9254      D    hsid  29
40     NSW   M  8760  8959      D    hsid  37
41     NSW   M  8802  8879      D      hs  46
42     NSW   M  8877  9180      D      hs  37
43     NSW   M  9011  9696      D   blood  54
44     NSW   M  8990  9175      D      hs  30
45     NSW   M  9063  9172      D   blood  25
46     NSW   M  9003  9109      D    hsid  26
47     NSW   M  9022  9218      D      hs  41
48     NSW   M  8985  9254      D      hs  41
49     NSW   M  9030  9781      D      hs  27
50     NSW   M  9086  9314      D      hs  35
51     NSW   M  9015  9943      D      hs  35
52     NSW   M  9009  9350      D      hs  25
53     NSW   M  8970  9240      D      hs  34
54     NSW   M  9171  9309      D      hs  35
55     NSW   M  9087  9598      D      hs  33
56     NSW   M  9115  9686      D      hs  31
57     NSW   M  9065  9262      D      hs  43
58     NSW   M  9104  9126      D      hs  59
59     NSW   M  9028  9532      D      hs  31
60     NSW   M  9101  9268      D      hs  41
61     NSW   M  9096  9226      D      hs  34
62     NSW   M  9128  9660      D      hs  37
63     NSW   M  9125  9207      D      hs  31
64     NSW   M  9083  9682      D      hs  37
65     NSW   M  9150  9285      D      hs  38
66     NSW   F  9014  9152      D   blood  44
67     NSW   M  9157  9962      D      hs  41
68     NSW   M  9098  9418      D      hs  41
69     NSW   M  8913  9082      D      hs  32
70     NSW   M  9141  9222      D      hs  40
71     NSW   M  9158  9920      D      hs  23
72     NSW   M  9167 10461      D      hs  42
73     NSW   M  9244  9379      D      hs  33
74     NSW   M  9138  9565      D      hs  47
75     NSW   M  9222  9536      D      hs  52
76     NSW   M  9272  9290      D      hs  35
77     NSW   M  9131  9392      D      hs  38
78     NSW   M  9236 10013      D      hs  23
79     NSW   M  9145  9250      D      hs  45
80     NSW   M  8964  9300      D    haem  48
81     NSW   M  9207  9768      D      hs  32
82     NSW   M  9240  9447      D      hs  38
83     NSW   M  9281  9723      D      hs  25
84     NSW   M  9300  9736      D      hs  36
85     NSW   M  9294 10070      D      hs  39
86     NSW   F  9258  9259      D   blood  25
87     NSW   M  9145  9436      D      hs  33
88     NSW   M  9310  9533      D      hs  35
89     NSW   M  9344 11320      D      hs  49
90     NSW   M  9185  9214      D      hs  38
91     NSW   M  9247  9549      D      hs  30
92     NSW   M  9201  9315      D      hs  44
93     NSW   F  9349  9392      D   blood  55
94     NSW   M  9246  9956      D      hs  31
95     NSW   M  9273 10018      D      hs  32
96     NSW   M  9241  9576      D      hs  29
97     NSW   M  9264  9451      D      hs  42
98     NSW   M  9310  9730      D      hs  28
", header=T, stringsAsFactors=F)

您可以创建一个包含按性别分类的 hs/其他感染计数的数据集,并应用 prop.test 函数

library(dplyr)
library(tidyr)

df_counts = df %>%
  mutate(T.categ2 = ifelse(T.categ == "hs", T.categ, "other")) %>%  # flag hs or other
  count(sex, T.categ2) %>%                                          # count how many times each sex was infected by hs or other
  spread(T.categ2, n, fill = 0) %>%                                 # reshape dataset
  mutate(Total = hs+other)                                          # count totals

# see how your aggregated dataset with counts looks like
df_counts

# # A tibble: 2 x 4
#   sex      hs other Total
#   <chr> <dbl> <dbl> <dbl>
# 1 F       0    4.00  4.00
# 2 M      78.0 16.0  94.0

# apply prop test using the appropriate columns
prop.test(df_counts$hs, df_counts$Total)