在for循环内的for循环中求和数据帧值

Sum dataframe values in for loop inside a for loop

我有一个 large polygon 文件、small polygon 文件和 points 文件。我在这里所做的是循环遍历大多边形以查找哪些小多边形相交。然后计算大多边形内每个小多边形的面积。然后我遍历小多边形以在每个小多边形中找到点统计信息。

我在每个小多边形中找到了 number_of_somethin 值。问题是如何将大多边形内的所有 number_of_somethin 小多边形值相加并将结果作为新列存储在原始 large_polygon 文件中,比方说 large_polygon['smth_sum']?

使用 df_res_2.loc[idx, 'smth'] = number_of_somethin,我在大多边形内的每个小多边形中得到 number_of_somethin 值。现在我需要在 large_polygon['smth_sum']

中对它们求和

注意: FID是大多边形的id,ID是大多边形的id小多边形

import geopandas as gpd

small_polygon = gpd.read_file(r'R:\...\small.shp')
large_polygon = gpd.read_file(r'R:\...\large.shp')
points = gpd.read_file(r'R:\...\points.shp')

SmallJoin =gpd.sjoin(small_polygon, large_polygon)[['FID', 'ID', 'someValue','geometry']]

for i in large_polygon.index:
    df_i = SmallJoin[SmallJoin['FID'] == i]

    # i do something here, f.e. calculate small polgyon area
    df_res = gpd.overlay(large_polygon, df_i, how='intersection')
    df_res['area'] = round((df_res.apply(lambda row: row.geometry.area, axis=1)), 4)

    # now i know area for each small polygon within large polygon
    df_res_2 = df_res[df_res['FID_1'] == i]

    # now point statistics in small polygons
    PointsJoin =gpd.sjoin(points, df_res)[['ID','someAttribute', 'someAttribute2','geometry']]

    for idx, val in df_res_2['ID'].items():
        df_idx = PointsJoin[PointsJoin['ID'] == val]
        number_of_somethin = df_idx ['someAttribute'] + 121 + df_idx['someAttribute2']
        df_res_2.loc[idx, 'smth'] = number_of_somethin

我有一些关于如何做到这一点的想法,但是 none 没有奏效,所以我认为还有其他方法。

large_polygon.loc[i, 'smth_sum'] = df_res_2['smth']
large_polygon.loc[i, 'smth_sum'] = df_res_2['smth'].sum()

large_polygon['smth_sum'] = large_polygon[large_polygon['FID'] == df_res_2['FID_1'].sum()]
  • 你描述三个GeoDataFrame

    1. 大 - 为此使用了国家/地区边界
    2. 小 - 为此使用了 UTM 区域边界
    3. point - 使用了大部分重叠 2
    4. 的随机生成的点
  • 您定义每个大几何体(此处为国家/地区)需要两个输出

    • 面积 - 交点 面积 的总和 small几何
    • value - points 的值总和 small geometry 在空间上连接到 large geometry
  • 以上所有都可以通过空间连接和 pandas merge()groupby()[=16 来实现=]

  • 使这一点更清楚 - 还包括一种可视化所有这些的方法

import geopandas as gpd
import shapely.geometry
import requests
import numpy as np
import plotly.express as px

# get some sample data....
# fmt: off
gdf_utm = gpd.GeoDataFrame.from_features(requests.get("https://opendata.arcgis.com/datasets/b294795270aa4fb3bd25286bf09edc51_0.geojson").json()).set_crs("EPSG:4326")
gdf_countries = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))

large_polygon = gdf_countries.loc[lambda d: d["iso_a3"].isin(["BEL", "LUX", "NLD", "DEU", "AUT"])]
# large_polygon.boundary.plot()

small_polygon = gpd.sjoin(gdf_utm, large_polygon).loc[:, gdf_utm.columns].groupby(["FID", "ZONE"]).first().reset_index()
# fmt: on

# some points within geometry of small_polygon
b = small_polygon.total_bounds
POINTS = 10
points = gpd.GeoDataFrame(
    geometry=[
        shapely.geometry.Point(x, y)
        for x, y in zip(
            np.random.uniform(*b[[0, 2]], POINTS),
            np.random.uniform(*b[[1, 3]], POINTS),
        )
    ],
    data={"value": np.arange(POINTS)},
    crs="EPSG:4326",
)

# spatial small to large with geometry from large
SmallJoin = gpd.sjoin(small_polygon, large_polygon).merge(
    large_polygon["geometry"],
    left_on="index_right",
    right_index=True,
    suffixes=("", "_large"),
)
SmallJoin["area"] = SmallJoin.intersection(gpd.GeoSeries(SmallJoin["geometry_large"])).area

# get sums of area of overlap and sum of values from points
Final = (
    SmallJoin.rename(columns={"index_right": "index_large"})
    .sjoin(points)
    .groupby("index_large")
    .agg({"area": "sum", "value": "sum", "geometry_large": "first"})
)

产出

index_large area value
114 24.6382 25
121 90.3565 45
128 0.603031 20
129 7.65999 20
130 10.5284 20

形象化

px.choropleth_mapbox(
    Final,
    geojson=gpd.GeoSeries(Final["geometry_large"]),
    locations=Final.index,
    color="value",
    hover_data=["area"],
).add_traces(
    px.scatter_mapbox(
        points,
        lat=points.geometry.y,
        lon=points.geometry.x,
        color="value",
    )
    .update_traces(marker_coloraxis="coloraxis2", marker_size=10)
    .data
).update_layout(
    mapbox={
        "style": "carto-positron",
        "center": {"lon": sum(b[[0, 2]]) / 2, "lat": sum(b[[1, 3]]) / 2},
        "zoom": 3,
        "layers": [{"source": small_polygon.__geo_interface__, "type": "line"}],
    },
    coloraxis2={
        "colorbar": {"x": -0.1, "title": "scatter"},
        "colorscale": [[0, "blue"], [1, "blue"]],
    },
    coloraxis={"colorscale": [[0, "white"], [1, "green"]]},
    margin={"l": 0, "r": 0, "t": 0, "b": 0},
)