数据提取
Extraction of data
我正在尝试从 netcdf 文件中提取 2000-2003 年期间某个区域(经度 -45W 到 10E 和纬度 30-40E)和一个点(纬度=37 度,经度=-40),也适用于整个地理区域。
任务:
- 在 400-450、450-500,500-550,550-600 hpa 的区域和 2000-2003 年期间提取每月 MMR。例如,在 400 hPa 获取(纬度、经度、MMR 作为栅格层)
- 将两个压力水平之间的月度臭氧 MMR 提取为栅格格式。
library(ncdf4)
nc = nc_open("E:/ERA5/ERA_2000_2003.nc")
lat=ncvar_get (nc,"latitude") ; lon=ncvar_get(nc,"longitude")
t = ncvar_get(nc, "time")
pres = ncvar_get(nc, "level")
length(lat); length(lon);length(time);length(pres)
t <- as.POSIXct("1900-01-01 00:00")+as.difftime(t,units="hours")
x <- c(400, 450, 500, 550, 600)
#I want ozone for longitude -45W to 10E and latitude (30-40E) at pressure level 500
#for all years (12months*4 yrs= 48 months)
oz = ncvar_get(nc,'o3',start=c(1,1,1,1),count=c(100,-1,x[3],-1))
lonIdx <- which( lon >= -40 & lon <= 10.00);
latIdx <- which( lat >= 30.00 & lat <= 45.00)
我收到以下错误:
1. Error: cannot allocate vector of size 558.5 Mb
oz = ncvar_get(nc,'o3',start=c(lonIdx,latIdx,1,1),count=c(length(lonIdx),length(latIdx),x[1),-1))
# and sometimes error messages like
2.Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, :
Error:
variable has 4 dims, but start has 264 entries. They must match!
3. Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, : Error: variable has 4 dims, but start has 324 entries. They must match!
Traceback:
1. ncvar_get(nc, "o3", start = c(lonIdx, latIdx, 1, 1), count = c(length(lonIdx),
. length(latIdx), -1, -1))
2. ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset,
. scaleFact, start = start, count = count, verbose = verbose,
. signedbyte = signedbyte, collapse_degen = collapse_degen,
. raw_datavals = raw_datavals)
3. stop(paste("Error: variable has", ndims, "dims, but start has",
. length(start), "entries. They must match!"))
任何帮助将不胜感激。
谢谢
从 netcdf 对象中提取感兴趣的子集的两个选项是 1) 使用定义了 start
和 count
的 ncvar_get
函数,或 2) 使用 ncvar_get
来提取整个数组,然后使用标准 base R
数组索引到子集。我不太确定如何计算 MMR,所以我展示了如何提取 o3 值并进行一些基本总结。如果这没有具体完成这两项任务,希望它有足够的提示来帮助您入门。
读入数据
library(ncdf4)
# read file
nc <- nc_open("ERA5_2000_03.nc")
# extract variable levels
lat <- ncvar_get (nc, "latitude")
lon <- ncvar_get(nc, "longitude")
t <- ncvar_get(nc, "time")
t <- as.POSIXct("1900-01-01 00:00") + as.difftime(t, units = "hours")
pres <- ncvar_get(nc, "level")
定义感兴趣区域的索引
lonIdx <- which(lon >= -40 & lon <= 10)
latIdx <- which(lat >= 30 & lat <= 45)
presIdx <- which(pres >= 475 & pres <= 525) # subset to only 500
tIdx <- which(t == t) # i.e., no subset of t
提取所有月份和压力水平o3
# Option 1 -- use ncvar_get()
extract1 <- ncvar_get(nc,
'o3',
start = c(lonIdx[1],latIdx[1], presIdx[1], tIdx[1]),
count = c(length(lonIdx),length(latIdx),length(presIdx), length(tIdx)))
# Option 2 -- subset using array indexing
o3 <- ncvar_get(nc,'o3')
extract2 <- o3[lonIdx, latIdx, presIdx, ]
# Check the outputs are identical
identical(extract1, extract2)
#>TRUE
可选地,您还可以命名数组维度,这有助于使结果直观
# Name the array dimensions
dimnames(o3) <- list('lon' = lon,
'lat' = lat,
'pres' = pres,
'time' = as.character(as.Date(t)))
extract3 <- o3[lonIdx,latIdx, , ]
# Monthly mean for each level of pres across the entire region of interest
apply(extract3, c('pres', 'time'), mean)
# time
# pres 2000-01-01 2000-02-01 2000-03-01
# 400 8.627874e-08 8.303606e-08 9.857230e-08
# 450 7.911534e-08 7.790138e-08 9.043681e-08
# 500 7.421088e-08 7.398450e-08 8.510393e-08
# 550 7.074213e-08 7.102885e-08 8.128490e-08
# 600 6.817185e-08 6.840323e-08 7.853805e-08
# ...
# o3 levels at a specific long/lat
o3['-40', '37', ,]
# time
# pres 2000-01-01 2000-02-01 2000-03-01
# 400 8.160814e-08 8.172732e-08 1.006738e-07
# 450 7.688993e-08 7.681632e-08 9.276743e-08
# 500 7.274836e-08 7.514602e-08 8.791778e-08
# 550 6.989851e-08 7.359140e-08 8.330298e-08
# 600 6.867163e-08 7.110260e-08 8.087377e-08
我正在尝试从 netcdf 文件中提取 2000-2003 年期间某个区域(经度 -45W 到 10E 和纬度 30-40E)和一个点(纬度=37 度,经度=-40),也适用于整个地理区域。 任务:
- 在 400-450、450-500,500-550,550-600 hpa 的区域和 2000-2003 年期间提取每月 MMR。例如,在 400 hPa 获取(纬度、经度、MMR 作为栅格层)
- 将两个压力水平之间的月度臭氧 MMR 提取为栅格格式。
library(ncdf4)
nc = nc_open("E:/ERA5/ERA_2000_2003.nc")
lat=ncvar_get (nc,"latitude") ; lon=ncvar_get(nc,"longitude")
t = ncvar_get(nc, "time")
pres = ncvar_get(nc, "level")
length(lat); length(lon);length(time);length(pres)
t <- as.POSIXct("1900-01-01 00:00")+as.difftime(t,units="hours")
x <- c(400, 450, 500, 550, 600)
#I want ozone for longitude -45W to 10E and latitude (30-40E) at pressure level 500
#for all years (12months*4 yrs= 48 months)
oz = ncvar_get(nc,'o3',start=c(1,1,1,1),count=c(100,-1,x[3],-1))
lonIdx <- which( lon >= -40 & lon <= 10.00);
latIdx <- which( lat >= 30.00 & lat <= 45.00)
我收到以下错误:
1. Error: cannot allocate vector of size 558.5 Mb
oz = ncvar_get(nc,'o3',start=c(lonIdx,latIdx,1,1),count=c(length(lonIdx),length(latIdx),x[1),-1))
# and sometimes error messages like
2.Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, :
Error:
variable has 4 dims, but start has 264 entries. They must match!
3. Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, : Error: variable has 4 dims, but start has 324 entries. They must match!
Traceback:
1. ncvar_get(nc, "o3", start = c(lonIdx, latIdx, 1, 1), count = c(length(lonIdx),
. length(latIdx), -1, -1))
2. ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset,
. scaleFact, start = start, count = count, verbose = verbose,
. signedbyte = signedbyte, collapse_degen = collapse_degen,
. raw_datavals = raw_datavals)
3. stop(paste("Error: variable has", ndims, "dims, but start has",
. length(start), "entries. They must match!"))
任何帮助将不胜感激。
谢谢
从 netcdf 对象中提取感兴趣的子集的两个选项是 1) 使用定义了 start
和 count
的 ncvar_get
函数,或 2) 使用 ncvar_get
来提取整个数组,然后使用标准 base R
数组索引到子集。我不太确定如何计算 MMR,所以我展示了如何提取 o3 值并进行一些基本总结。如果这没有具体完成这两项任务,希望它有足够的提示来帮助您入门。
读入数据
library(ncdf4)
# read file
nc <- nc_open("ERA5_2000_03.nc")
# extract variable levels
lat <- ncvar_get (nc, "latitude")
lon <- ncvar_get(nc, "longitude")
t <- ncvar_get(nc, "time")
t <- as.POSIXct("1900-01-01 00:00") + as.difftime(t, units = "hours")
pres <- ncvar_get(nc, "level")
定义感兴趣区域的索引
lonIdx <- which(lon >= -40 & lon <= 10)
latIdx <- which(lat >= 30 & lat <= 45)
presIdx <- which(pres >= 475 & pres <= 525) # subset to only 500
tIdx <- which(t == t) # i.e., no subset of t
提取所有月份和压力水平o3
# Option 1 -- use ncvar_get()
extract1 <- ncvar_get(nc,
'o3',
start = c(lonIdx[1],latIdx[1], presIdx[1], tIdx[1]),
count = c(length(lonIdx),length(latIdx),length(presIdx), length(tIdx)))
# Option 2 -- subset using array indexing
o3 <- ncvar_get(nc,'o3')
extract2 <- o3[lonIdx, latIdx, presIdx, ]
# Check the outputs are identical
identical(extract1, extract2)
#>TRUE
可选地,您还可以命名数组维度,这有助于使结果直观
# Name the array dimensions
dimnames(o3) <- list('lon' = lon,
'lat' = lat,
'pres' = pres,
'time' = as.character(as.Date(t)))
extract3 <- o3[lonIdx,latIdx, , ]
# Monthly mean for each level of pres across the entire region of interest
apply(extract3, c('pres', 'time'), mean)
# time
# pres 2000-01-01 2000-02-01 2000-03-01
# 400 8.627874e-08 8.303606e-08 9.857230e-08
# 450 7.911534e-08 7.790138e-08 9.043681e-08
# 500 7.421088e-08 7.398450e-08 8.510393e-08
# 550 7.074213e-08 7.102885e-08 8.128490e-08
# 600 6.817185e-08 6.840323e-08 7.853805e-08
# ...
# o3 levels at a specific long/lat
o3['-40', '37', ,]
# time
# pres 2000-01-01 2000-02-01 2000-03-01
# 400 8.160814e-08 8.172732e-08 1.006738e-07
# 450 7.688993e-08 7.681632e-08 9.276743e-08
# 500 7.274836e-08 7.514602e-08 8.791778e-08
# 550 6.989851e-08 7.359140e-08 8.330298e-08
# 600 6.867163e-08 7.110260e-08 8.087377e-08