R - 切换因子中的水平和值
R - Switch levels and values in a factor
我正在尝试将一个因子转换为数字数据类型,但注意到我的值和水平被交换了,所以当我转换时,我只得到每个值的计数而不是值本身。
我试过as.numeric(as.character(data$column)
,但遇到了 NA。
str(data$column)
产生输出:“因子 w/ 394 水平“x1”,“x2”,“x3”...:y y x y ...
而实际上水平应该代替值。
dput(head(train$Sqft, 100))
structure(c(13L, 13L, 17L, 13L, 13L, 12L, 62L, 17L, 13L, 17L,
29L, 13L, 4L, 17L, 39L, 29L, 17L, 38L, 17L, 12L, 17L, 39L, 39L,
96L, 39L, 39L, 82L, 30L, 29L, 216L, 96L, 25L, 169L, 195L, 195L,
169L, 169L, 168L, 170L, 167L, 167L, 194L, 167L, 195L, 169L, 169L,
168L, 195L, 169L, 169L, 196L, 169L, 191L, 168L, 169L, 196L, 196L,
169L, 195L, 168L, 169L, 169L, 169L, 195L, 197L, 169L, 169L, 169L,
169L, 195L, 170L, 173L, 173L, 173L, 173L, 197L, 202L, 173L, 178L,
271L, 265L, 271L, 271L, 265L, 265L, 265L, 265L, 271L, 166L, 33L,
166L, 33L, 166L, 33L, 166L, 33L, 166L, 33L, 166L, 33L), .Label = c("1,072",
"1,098", "1,168", "1,200", "1,236", "1,280", "1,288", "1,294",
"1,301", "1,314", "1,317", "1,320", "1,360", "1,364", "1,368",
"1,375", "1,380", "1,382", "1,394", "1,400", "1,408", "1,424",
"1,496", "1,500", "1,507", "1,512", "1,515", "1,522", "1,554",
"1,562", "1,566", "1,568", "1,570", "1,595", "1,596", "1,607",
"1,613", "1,614", "1,630", "1,636", "1,638", "1,642", "1,654",
"1,664", "1,668", "1,680", "1,694", "1,706", "1,712", "1,718",
"1,719", "1,720", "1,722", "1,724", "1,726", "1,731", "1,732",
"1,739", "1,744", "1,748", "1,761", "1,770", "1,800", "1,802",
"1,809", "1,812", "1,816", "1,824", "1,831", "1,832", "1,843",
"1,862", "1,866", "1,882", "1,888", "1,890", "1,894", "1,898",
"1,920", "1,924", "1,930", "1,940", "1,948", "1,953", "1,960",
"1,968", "1,970", "1,972", "1,978", "1,981", "1,987", "1,995",
"1,996", "2,004", "2,014", "2,019", "2,042", "2,047", "2,060",
"2,085", "2,088", "2,089", "2,098", "2,102", "2,116", "2,130",
"2,131", "2,132", "2,134", "2,135", "2,151", "2,153", "2,155",
"2,168", "2,181", "2,184", "2,185", "2,188", "2,196", "2,198",
"2,203", "2,206", "2,225", "2,232", "2,239", "2,249", "2,260",
"2,270", "2,278", "2,286", "2,290", "2,295", "2,305", "2,308",
"2,312", "2,313", "2,321", "2,326", "2,335", "2,336", "2,338",
"2,348", "2,352", "2,360", "2,373", "2,374", "2,384", "2,385",
"2,394", "2,396", "2,400", "2,408", "2,410", "2,436", "2,437",
"2,442", "2,452", "2,464", "2,478", "2,483", "2,489", "2,496",
"2,497", "2,501", "2,503", "2,509", "2,510", "2,512", "2,514",
"2,518", "2,519", "2,527", "2,530", "2,536", "2,556", "2,558",
"2,559", "2,562", "2,582", "2,592", "2,600", "2,604", "2,616",
"2,622", "2,632", "2,635", "2,638", "2,674", "2,682", "2,692",
"2,710", "2,714", "2,727", "2,730", "2,732", "2,734", "2,738",
"2,760", "2,763", "2,772", "2,776", "2,782", "2,786", "2,787",
"2,788", "2,792", "2,798", "2,804", "2,808", "2,813", "2,820",
"2,824", "2,826", "2,836", "2,838", "2,839", "2,841", "2,856",
"2,860", "2,864", "2,874", "2,885", "2,898", "2,902", "2,903",
"2,904", "2,906", "2,932", "2,934", "2,938", "2,940", "2,943",
"2,948", "2,958", "2,959", "2,964", "2,968", "2,982", "2,983",
"2,988", "2,989", "2,992", "2,996", "3,000", "3,004", "3,010",
"3,011", "3,012", "3,014", "3,026", "3,032", "3,040", "3,042",
"3,043", "3,044", "3,050", "3,056", "3,060", "3,064", "3,067",
"3,068", "3,078", "3,082", "3,086", "3,090", "3,108", "3,112",
"3,114", "3,116", "3,124", "3,126", "3,139", "3,147", "3,154",
"3,156", "3,160", "3,161", "3,172", "3,176", "3,178", "3,180",
"3,184", "3,190", "3,196", "3,204", "3,210", "3,214", "3,234",
"3,238", "3,239", "3,253", "3,262", "3,269", "3,272", "3,277",
"3,292", "3,295", "3,302", "3,326", "3,341", "3,342", "3,370",
"3,373", "3,374", "3,404", "3,413", "3,420", "3,422", "3,425",
"3,430", "3,442", "3,443", "3,459", "3,460", "3,462", "3,470",
"3,520", "3,529", "3,536", "3,545", "3,550", "3,551", "3,566",
"3,576", "3,578", "3,594", "3,614", "3,628", "3,630", "3,632",
"3,634", "3,648", "3,663", "3,666", "3,676", "3,678", "3,680",
"3,700", "3,727", "3,738", "3,740", "3,746", "3,765", "3,767",
"3,804", "3,808", "3,837", "3,840", "3,872", "3,889", "3,913",
"3,924", "3,956", "3,991", "4,056", "4,086", "4,116", "4,135",
"4,145", "4,152", "4,155", "4,158", "4,169", "4,231", "4,256",
"4,305", "4,334", "4,358", "4,361", "4,376", "4,432", "4,448",
"4,464", "4,481", "4,500", "4,529", "4,554", "4,568", "4,712",
"4,760", "4,767", "4,802", "4,845", "4,892", "4,928", "5,070",
"5,328", "5,354", "5,386", "5,456", "5,524", "5,536", "5,701",
"6,486"), class = "factor")
# base r solution
type.convert(gsub(",", "", levels(train$Sqft))) # remove a comma i.e. ',' which was used as grouping mark and then convert to integer.
# tidyverse way
readr::parse_number(levels(train$Sqft))
as.numeric(as.character(sub(',', '.', x)))
# [1] 1.360 1.360 1.380 1.360 1.360 1.320 1.770 1.380 1.360 1.380 1.554
# [12] 1.360 1.200 1.380 1.630 1.554 1.380 1.614 1.380 1.320 1.380 1.630
# [23] 1.630 2.019 1.630 1.630 1.940 1.562 1.554 2.839 2.019 1.507 2.514
# [34] 2.732 2.732 2.514 2.514 2.512 2.518 2.510 2.510 2.730 2.510 2.732
# [45] 2.514 2.514 2.512 2.732 2.514 2.514 2.734 2.514 2.710 2.512 2.514
# [56] 2.734 2.734 2.514 2.732 2.512 2.514 2.514 2.514 2.732 2.738 2.514
# [67] 2.514 2.514 2.514 2.732 2.518 2.530 2.530 2.530 2.530 2.738 2.782
# [78] 2.530 2.562 3.126 3.090 3.126 3.126 3.090 3.090 3.090 3.090 3.126
# [89] 2.509 1.570 2.509 1.570 2.509 1.570 2.509 1.570 2.509 1.570 2.509
# [100] 1.570
或更有效:
as.numeric(sub(',', '.', levels(x), fixed = TRUE))[x]
我正在尝试将一个因子转换为数字数据类型,但注意到我的值和水平被交换了,所以当我转换时,我只得到每个值的计数而不是值本身。
我试过as.numeric(as.character(data$column)
,但遇到了 NA。
str(data$column)
产生输出:“因子 w/ 394 水平“x1”,“x2”,“x3”...:y y x y ...
而实际上水平应该代替值。
dput(head(train$Sqft, 100))
structure(c(13L, 13L, 17L, 13L, 13L, 12L, 62L, 17L, 13L, 17L,
29L, 13L, 4L, 17L, 39L, 29L, 17L, 38L, 17L, 12L, 17L, 39L, 39L,
96L, 39L, 39L, 82L, 30L, 29L, 216L, 96L, 25L, 169L, 195L, 195L,
169L, 169L, 168L, 170L, 167L, 167L, 194L, 167L, 195L, 169L, 169L,
168L, 195L, 169L, 169L, 196L, 169L, 191L, 168L, 169L, 196L, 196L,
169L, 195L, 168L, 169L, 169L, 169L, 195L, 197L, 169L, 169L, 169L,
169L, 195L, 170L, 173L, 173L, 173L, 173L, 197L, 202L, 173L, 178L,
271L, 265L, 271L, 271L, 265L, 265L, 265L, 265L, 271L, 166L, 33L,
166L, 33L, 166L, 33L, 166L, 33L, 166L, 33L, 166L, 33L), .Label = c("1,072",
"1,098", "1,168", "1,200", "1,236", "1,280", "1,288", "1,294",
"1,301", "1,314", "1,317", "1,320", "1,360", "1,364", "1,368",
"1,375", "1,380", "1,382", "1,394", "1,400", "1,408", "1,424",
"1,496", "1,500", "1,507", "1,512", "1,515", "1,522", "1,554",
"1,562", "1,566", "1,568", "1,570", "1,595", "1,596", "1,607",
"1,613", "1,614", "1,630", "1,636", "1,638", "1,642", "1,654",
"1,664", "1,668", "1,680", "1,694", "1,706", "1,712", "1,718",
"1,719", "1,720", "1,722", "1,724", "1,726", "1,731", "1,732",
"1,739", "1,744", "1,748", "1,761", "1,770", "1,800", "1,802",
"1,809", "1,812", "1,816", "1,824", "1,831", "1,832", "1,843",
"1,862", "1,866", "1,882", "1,888", "1,890", "1,894", "1,898",
"1,920", "1,924", "1,930", "1,940", "1,948", "1,953", "1,960",
"1,968", "1,970", "1,972", "1,978", "1,981", "1,987", "1,995",
"1,996", "2,004", "2,014", "2,019", "2,042", "2,047", "2,060",
"2,085", "2,088", "2,089", "2,098", "2,102", "2,116", "2,130",
"2,131", "2,132", "2,134", "2,135", "2,151", "2,153", "2,155",
"2,168", "2,181", "2,184", "2,185", "2,188", "2,196", "2,198",
"2,203", "2,206", "2,225", "2,232", "2,239", "2,249", "2,260",
"2,270", "2,278", "2,286", "2,290", "2,295", "2,305", "2,308",
"2,312", "2,313", "2,321", "2,326", "2,335", "2,336", "2,338",
"2,348", "2,352", "2,360", "2,373", "2,374", "2,384", "2,385",
"2,394", "2,396", "2,400", "2,408", "2,410", "2,436", "2,437",
"2,442", "2,452", "2,464", "2,478", "2,483", "2,489", "2,496",
"2,497", "2,501", "2,503", "2,509", "2,510", "2,512", "2,514",
"2,518", "2,519", "2,527", "2,530", "2,536", "2,556", "2,558",
"2,559", "2,562", "2,582", "2,592", "2,600", "2,604", "2,616",
"2,622", "2,632", "2,635", "2,638", "2,674", "2,682", "2,692",
"2,710", "2,714", "2,727", "2,730", "2,732", "2,734", "2,738",
"2,760", "2,763", "2,772", "2,776", "2,782", "2,786", "2,787",
"2,788", "2,792", "2,798", "2,804", "2,808", "2,813", "2,820",
"2,824", "2,826", "2,836", "2,838", "2,839", "2,841", "2,856",
"2,860", "2,864", "2,874", "2,885", "2,898", "2,902", "2,903",
"2,904", "2,906", "2,932", "2,934", "2,938", "2,940", "2,943",
"2,948", "2,958", "2,959", "2,964", "2,968", "2,982", "2,983",
"2,988", "2,989", "2,992", "2,996", "3,000", "3,004", "3,010",
"3,011", "3,012", "3,014", "3,026", "3,032", "3,040", "3,042",
"3,043", "3,044", "3,050", "3,056", "3,060", "3,064", "3,067",
"3,068", "3,078", "3,082", "3,086", "3,090", "3,108", "3,112",
"3,114", "3,116", "3,124", "3,126", "3,139", "3,147", "3,154",
"3,156", "3,160", "3,161", "3,172", "3,176", "3,178", "3,180",
"3,184", "3,190", "3,196", "3,204", "3,210", "3,214", "3,234",
"3,238", "3,239", "3,253", "3,262", "3,269", "3,272", "3,277",
"3,292", "3,295", "3,302", "3,326", "3,341", "3,342", "3,370",
"3,373", "3,374", "3,404", "3,413", "3,420", "3,422", "3,425",
"3,430", "3,442", "3,443", "3,459", "3,460", "3,462", "3,470",
"3,520", "3,529", "3,536", "3,545", "3,550", "3,551", "3,566",
"3,576", "3,578", "3,594", "3,614", "3,628", "3,630", "3,632",
"3,634", "3,648", "3,663", "3,666", "3,676", "3,678", "3,680",
"3,700", "3,727", "3,738", "3,740", "3,746", "3,765", "3,767",
"3,804", "3,808", "3,837", "3,840", "3,872", "3,889", "3,913",
"3,924", "3,956", "3,991", "4,056", "4,086", "4,116", "4,135",
"4,145", "4,152", "4,155", "4,158", "4,169", "4,231", "4,256",
"4,305", "4,334", "4,358", "4,361", "4,376", "4,432", "4,448",
"4,464", "4,481", "4,500", "4,529", "4,554", "4,568", "4,712",
"4,760", "4,767", "4,802", "4,845", "4,892", "4,928", "5,070",
"5,328", "5,354", "5,386", "5,456", "5,524", "5,536", "5,701",
"6,486"), class = "factor")
# base r solution
type.convert(gsub(",", "", levels(train$Sqft))) # remove a comma i.e. ',' which was used as grouping mark and then convert to integer.
# tidyverse way
readr::parse_number(levels(train$Sqft))
as.numeric(as.character(sub(',', '.', x)))
# [1] 1.360 1.360 1.380 1.360 1.360 1.320 1.770 1.380 1.360 1.380 1.554
# [12] 1.360 1.200 1.380 1.630 1.554 1.380 1.614 1.380 1.320 1.380 1.630
# [23] 1.630 2.019 1.630 1.630 1.940 1.562 1.554 2.839 2.019 1.507 2.514
# [34] 2.732 2.732 2.514 2.514 2.512 2.518 2.510 2.510 2.730 2.510 2.732
# [45] 2.514 2.514 2.512 2.732 2.514 2.514 2.734 2.514 2.710 2.512 2.514
# [56] 2.734 2.734 2.514 2.732 2.512 2.514 2.514 2.514 2.732 2.738 2.514
# [67] 2.514 2.514 2.514 2.732 2.518 2.530 2.530 2.530 2.530 2.738 2.782
# [78] 2.530 2.562 3.126 3.090 3.126 3.126 3.090 3.090 3.090 3.090 3.126
# [89] 2.509 1.570 2.509 1.570 2.509 1.570 2.509 1.570 2.509 1.570 2.509
# [100] 1.570
或更有效:
as.numeric(sub(',', '.', levels(x), fixed = TRUE))[x]