在for循环内的for循环中求和数据帧值
Sum dataframe values in for loop inside a for loop
我有一个 large polygon
文件、small polygon
文件和 points
文件。我在这里所做的是循环遍历大多边形以查找哪些小多边形相交。然后计算大多边形内每个小多边形的面积。然后我遍历小多边形以在每个小多边形中找到点统计信息。
我在每个小多边形中找到了 number_of_somethin
值。问题是如何将大多边形内的所有 number_of_somethin
小多边形值相加并将结果作为新列存储在原始 large_polygon
文件中,比方说 large_polygon['smth_sum']
?
使用 df_res_2.loc[idx, 'smth'] = number_of_somethin
,我在大多边形内的每个小多边形中得到 number_of_somethin 值。现在我需要在 large_polygon['smth_sum']
中对它们求和
注意: FID是大多边形的id,ID是大多边形的id小多边形
import geopandas as gpd
small_polygon = gpd.read_file(r'R:\...\small.shp')
large_polygon = gpd.read_file(r'R:\...\large.shp')
points = gpd.read_file(r'R:\...\points.shp')
SmallJoin =gpd.sjoin(small_polygon, large_polygon)[['FID', 'ID', 'someValue','geometry']]
for i in large_polygon.index:
df_i = SmallJoin[SmallJoin['FID'] == i]
# i do something here, f.e. calculate small polgyon area
df_res = gpd.overlay(large_polygon, df_i, how='intersection')
df_res['area'] = round((df_res.apply(lambda row: row.geometry.area, axis=1)), 4)
# now i know area for each small polygon within large polygon
df_res_2 = df_res[df_res['FID_1'] == i]
# now point statistics in small polygons
PointsJoin =gpd.sjoin(points, df_res)[['ID','someAttribute', 'someAttribute2','geometry']]
for idx, val in df_res_2['ID'].items():
df_idx = PointsJoin[PointsJoin['ID'] == val]
number_of_somethin = df_idx ['someAttribute'] + 121 + df_idx['someAttribute2']
df_res_2.loc[idx, 'smth'] = number_of_somethin
我有一些关于如何做到这一点的想法,但是 none 没有奏效,所以我认为还有其他方法。
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth']
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth'].sum()
large_polygon['smth_sum'] = large_polygon[large_polygon['FID'] == df_res_2['FID_1'].sum()]
你描述三个GeoDataFrame
- 大 - 为此使用了国家/地区边界
- 小 - 为此使用了 UTM 区域边界
- point - 使用了大部分重叠 2
的随机生成的点
您定义每个大几何体(此处为国家/地区)需要两个输出
- 面积 - 交点 面积 的总和 small几何
- value - points 的值总和 small geometry 在空间上连接到 large geometry
以上所有都可以通过空间连接和 pandas merge()
和 groupby()
[=16 来实现=]
使这一点更清楚 - 还包括一种可视化所有这些的方法
import geopandas as gpd
import shapely.geometry
import requests
import numpy as np
import plotly.express as px
# get some sample data....
# fmt: off
gdf_utm = gpd.GeoDataFrame.from_features(requests.get("https://opendata.arcgis.com/datasets/b294795270aa4fb3bd25286bf09edc51_0.geojson").json()).set_crs("EPSG:4326")
gdf_countries = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
large_polygon = gdf_countries.loc[lambda d: d["iso_a3"].isin(["BEL", "LUX", "NLD", "DEU", "AUT"])]
# large_polygon.boundary.plot()
small_polygon = gpd.sjoin(gdf_utm, large_polygon).loc[:, gdf_utm.columns].groupby(["FID", "ZONE"]).first().reset_index()
# fmt: on
# some points within geometry of small_polygon
b = small_polygon.total_bounds
POINTS = 10
points = gpd.GeoDataFrame(
geometry=[
shapely.geometry.Point(x, y)
for x, y in zip(
np.random.uniform(*b[[0, 2]], POINTS),
np.random.uniform(*b[[1, 3]], POINTS),
)
],
data={"value": np.arange(POINTS)},
crs="EPSG:4326",
)
# spatial small to large with geometry from large
SmallJoin = gpd.sjoin(small_polygon, large_polygon).merge(
large_polygon["geometry"],
left_on="index_right",
right_index=True,
suffixes=("", "_large"),
)
SmallJoin["area"] = SmallJoin.intersection(gpd.GeoSeries(SmallJoin["geometry_large"])).area
# get sums of area of overlap and sum of values from points
Final = (
SmallJoin.rename(columns={"index_right": "index_large"})
.sjoin(points)
.groupby("index_large")
.agg({"area": "sum", "value": "sum", "geometry_large": "first"})
)
产出
index_large
area
value
114
24.6382
25
121
90.3565
45
128
0.603031
20
129
7.65999
20
130
10.5284
20
形象化
px.choropleth_mapbox(
Final,
geojson=gpd.GeoSeries(Final["geometry_large"]),
locations=Final.index,
color="value",
hover_data=["area"],
).add_traces(
px.scatter_mapbox(
points,
lat=points.geometry.y,
lon=points.geometry.x,
color="value",
)
.update_traces(marker_coloraxis="coloraxis2", marker_size=10)
.data
).update_layout(
mapbox={
"style": "carto-positron",
"center": {"lon": sum(b[[0, 2]]) / 2, "lat": sum(b[[1, 3]]) / 2},
"zoom": 3,
"layers": [{"source": small_polygon.__geo_interface__, "type": "line"}],
},
coloraxis2={
"colorbar": {"x": -0.1, "title": "scatter"},
"colorscale": [[0, "blue"], [1, "blue"]],
},
coloraxis={"colorscale": [[0, "white"], [1, "green"]]},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)
我有一个 large polygon
文件、small polygon
文件和 points
文件。我在这里所做的是循环遍历大多边形以查找哪些小多边形相交。然后计算大多边形内每个小多边形的面积。然后我遍历小多边形以在每个小多边形中找到点统计信息。
我在每个小多边形中找到了 number_of_somethin
值。问题是如何将大多边形内的所有 number_of_somethin
小多边形值相加并将结果作为新列存储在原始 large_polygon
文件中,比方说 large_polygon['smth_sum']
?
使用 df_res_2.loc[idx, 'smth'] = number_of_somethin
,我在大多边形内的每个小多边形中得到 number_of_somethin 值。现在我需要在 large_polygon['smth_sum']
注意: FID是大多边形的id,ID是大多边形的id小多边形
import geopandas as gpd
small_polygon = gpd.read_file(r'R:\...\small.shp')
large_polygon = gpd.read_file(r'R:\...\large.shp')
points = gpd.read_file(r'R:\...\points.shp')
SmallJoin =gpd.sjoin(small_polygon, large_polygon)[['FID', 'ID', 'someValue','geometry']]
for i in large_polygon.index:
df_i = SmallJoin[SmallJoin['FID'] == i]
# i do something here, f.e. calculate small polgyon area
df_res = gpd.overlay(large_polygon, df_i, how='intersection')
df_res['area'] = round((df_res.apply(lambda row: row.geometry.area, axis=1)), 4)
# now i know area for each small polygon within large polygon
df_res_2 = df_res[df_res['FID_1'] == i]
# now point statistics in small polygons
PointsJoin =gpd.sjoin(points, df_res)[['ID','someAttribute', 'someAttribute2','geometry']]
for idx, val in df_res_2['ID'].items():
df_idx = PointsJoin[PointsJoin['ID'] == val]
number_of_somethin = df_idx ['someAttribute'] + 121 + df_idx['someAttribute2']
df_res_2.loc[idx, 'smth'] = number_of_somethin
我有一些关于如何做到这一点的想法,但是 none 没有奏效,所以我认为还有其他方法。
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth']
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth'].sum()
large_polygon['smth_sum'] = large_polygon[large_polygon['FID'] == df_res_2['FID_1'].sum()]
你描述三个GeoDataFrame
- 大 - 为此使用了国家/地区边界
- 小 - 为此使用了 UTM 区域边界
- point - 使用了大部分重叠 2 的随机生成的点
您定义每个大几何体(此处为国家/地区)需要两个输出
- 面积 - 交点 面积 的总和 small几何
- value - points 的值总和 small geometry 在空间上连接到 large geometry
以上所有都可以通过空间连接和 pandas
merge()
和groupby()
[=16 来实现=]使这一点更清楚 - 还包括一种可视化所有这些的方法
import geopandas as gpd
import shapely.geometry
import requests
import numpy as np
import plotly.express as px
# get some sample data....
# fmt: off
gdf_utm = gpd.GeoDataFrame.from_features(requests.get("https://opendata.arcgis.com/datasets/b294795270aa4fb3bd25286bf09edc51_0.geojson").json()).set_crs("EPSG:4326")
gdf_countries = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
large_polygon = gdf_countries.loc[lambda d: d["iso_a3"].isin(["BEL", "LUX", "NLD", "DEU", "AUT"])]
# large_polygon.boundary.plot()
small_polygon = gpd.sjoin(gdf_utm, large_polygon).loc[:, gdf_utm.columns].groupby(["FID", "ZONE"]).first().reset_index()
# fmt: on
# some points within geometry of small_polygon
b = small_polygon.total_bounds
POINTS = 10
points = gpd.GeoDataFrame(
geometry=[
shapely.geometry.Point(x, y)
for x, y in zip(
np.random.uniform(*b[[0, 2]], POINTS),
np.random.uniform(*b[[1, 3]], POINTS),
)
],
data={"value": np.arange(POINTS)},
crs="EPSG:4326",
)
# spatial small to large with geometry from large
SmallJoin = gpd.sjoin(small_polygon, large_polygon).merge(
large_polygon["geometry"],
left_on="index_right",
right_index=True,
suffixes=("", "_large"),
)
SmallJoin["area"] = SmallJoin.intersection(gpd.GeoSeries(SmallJoin["geometry_large"])).area
# get sums of area of overlap and sum of values from points
Final = (
SmallJoin.rename(columns={"index_right": "index_large"})
.sjoin(points)
.groupby("index_large")
.agg({"area": "sum", "value": "sum", "geometry_large": "first"})
)
产出
index_large | area | value |
---|---|---|
114 | 24.6382 | 25 |
121 | 90.3565 | 45 |
128 | 0.603031 | 20 |
129 | 7.65999 | 20 |
130 | 10.5284 | 20 |
形象化
px.choropleth_mapbox(
Final,
geojson=gpd.GeoSeries(Final["geometry_large"]),
locations=Final.index,
color="value",
hover_data=["area"],
).add_traces(
px.scatter_mapbox(
points,
lat=points.geometry.y,
lon=points.geometry.x,
color="value",
)
.update_traces(marker_coloraxis="coloraxis2", marker_size=10)
.data
).update_layout(
mapbox={
"style": "carto-positron",
"center": {"lon": sum(b[[0, 2]]) / 2, "lat": sum(b[[1, 3]]) / 2},
"zoom": 3,
"layers": [{"source": small_polygon.__geo_interface__, "type": "line"}],
},
coloraxis2={
"colorbar": {"x": -0.1, "title": "scatter"},
"colorscale": [[0, "blue"], [1, "blue"]],
},
coloraxis={"colorscale": [[0, "white"], [1, "green"]]},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)