数据提取

Extraction of data

我正在尝试从 netcdf 文件中提取 2000-2003 年期间某个区域(经度 -45W 到 10E 和纬度 30-40E)和一个点(纬度=37 度,经度=-40),也适用于整个地理区域。 任务:

  1. 在 400-450、450-500,500-550,550-600 hpa 的区域和 2000-2003 年期间提取每月 MMR。例如,在 400 hPa 获取(纬度、经度、MMR 作为栅格层)
  2. 将两个压力水平之间的月度臭氧 MMR 提取为栅格格式。
library(ncdf4)
nc = nc_open("E:/ERA5/ERA_2000_2003.nc")
lat=ncvar_get (nc,"latitude") ; lon=ncvar_get(nc,"longitude")
t = ncvar_get(nc, "time")
pres = ncvar_get(nc, "level")
length(lat); length(lon);length(time);length(pres)
t <- as.POSIXct("1900-01-01 00:00")+as.difftime(t,units="hours")
x <- c(400, 450, 500, 550, 600) 

#I want ozone for longitude -45W to 10E and latitude (30-40E) at pressure level 500 
#for all years (12months*4 yrs= 48 months) 

oz = ncvar_get(nc,'o3',start=c(1,1,1,1),count=c(100,-1,x[3],-1))
lonIdx <- which( lon >= -40 & lon <= 10.00);
latIdx <- which( lat >= 30.00 & lat <= 45.00)

我收到以下错误:

1. Error: cannot allocate vector of size 558.5 Mb
oz = ncvar_get(nc,'o3',start=c(lonIdx,latIdx,1,1),count=c(length(lonIdx),length(latIdx),x[1),-1))
# and sometimes error messages like
2.Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset,  : 
 Error: 
variable has 4 dims, but start has 264 entries.  They must match!

3. Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, : Error: variable has 4 dims, but start has 324 entries.  They must match!
Traceback:

1. ncvar_get(nc, "o3", start = c(lonIdx, latIdx, 1, 1), count = c(length(lonIdx), 
 .     length(latIdx), -1, -1))
2. ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, 
 .     scaleFact, start = start, count = count, verbose = verbose, 
 .     signedbyte = signedbyte, collapse_degen = collapse_degen, 
 .     raw_datavals = raw_datavals)
3. stop(paste("Error: variable has", ndims, "dims, but start has", 
 .     length(start), "entries.  They must match!"))

任何帮助将不胜感激。

谢谢

从 netcdf 对象中提取感兴趣的子集的两个选项是 1) 使用定义了 startcountncvar_get 函数,或 2) 使用 ncvar_get 来提取整个数组,然后使用标准 base R 数组索引到子集。我不太确定如何计算 MMR,所以我展示了如何提取 o3 值并进行一些基本总结。如果这没有具体完成这两项任务,希望它有足够的提示来帮助您入门。

读入数据

library(ncdf4)

# read file 
nc <- nc_open("ERA5_2000_03.nc")

# extract variable levels
lat <- ncvar_get (nc, "latitude")
lon <- ncvar_get(nc, "longitude")
t <- ncvar_get(nc, "time")
t <- as.POSIXct("1900-01-01 00:00") + as.difftime(t, units = "hours")
pres <- ncvar_get(nc, "level")

定义感兴趣区域的索引

lonIdx <- which(lon >= -40 & lon <= 10)
latIdx <- which(lat >= 30 & lat <= 45)
presIdx <- which(pres >= 475 & pres <= 525) # subset to only 500
tIdx <- which(t == t) # i.e., no subset of t

提取所有月份和压力水平o3

# Option 1 -- use ncvar_get()
extract1 <- ncvar_get(nc,
                      'o3',
                      start = c(lonIdx[1],latIdx[1], presIdx[1], tIdx[1]),
                      count = c(length(lonIdx),length(latIdx),length(presIdx), length(tIdx)))


# Option 2 -- subset using array indexing
o3 <- ncvar_get(nc,'o3')
extract2 <- o3[lonIdx, latIdx, presIdx, ]


# Check the outputs are identical
identical(extract1, extract2)
#>TRUE

可选地,您还可以命名数组维度,这有助于使结果直观

# Name the array dimensions
dimnames(o3) <- list('lon' = lon, 
                     'lat' = lat,
                     'pres' = pres,
                     'time' = as.character(as.Date(t)))

extract3 <- o3[lonIdx,latIdx, , ]

# Monthly mean for each level of pres across the entire region of interest
apply(extract3, c('pres', 'time'), mean)
#       time
# pres    2000-01-01   2000-02-01   2000-03-01
#   400 8.627874e-08 8.303606e-08 9.857230e-08
#   450 7.911534e-08 7.790138e-08 9.043681e-08
#   500 7.421088e-08 7.398450e-08 8.510393e-08
#   550 7.074213e-08 7.102885e-08 8.128490e-08
#   600 6.817185e-08 6.840323e-08 7.853805e-08
#   ...

# o3 levels at a specific long/lat
o3['-40', '37', ,]
#       time
# pres    2000-01-01   2000-02-01   2000-03-01
#   400 8.160814e-08 8.172732e-08 1.006738e-07
#   450 7.688993e-08 7.681632e-08 9.276743e-08
#   500 7.274836e-08 7.514602e-08 8.791778e-08
#   550 6.989851e-08 7.359140e-08 8.330298e-08
#   600 6.867163e-08 7.110260e-08 8.087377e-08