How to use the geopandas.sjoin function in geopandas

To help you get started, we’ve selected a few geopandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Kaggle / docker-python / tests / test_geopandas.py View on Github external
def test_spatial_join(self):
        cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities'))
        world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
        countries = world[['geometry', 'name']]
        countries = countries.rename(columns={'name':'country'})
        cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects')
        self.assertTrue(cities_with_country.size > 1)
github holoviz / spatialpandas / tests / tools / test_sjoin.py View on Github external
left_gpdf = gp.GeoDataFrame({
        'geometry': gp_points,
        'a': np.arange(10, 10 + len(gp_points)),
        'c': np.arange(20, 20 + len(gp_points)),
        'v': np.arange(30, 30 + len(gp_points)),
    }).set_index('a')

    right_gpdf = gp.GeoDataFrame({
        'geometry': gp_polygons,
        'a': np.arange(10, 10 + len(gp_polygons)),
        'b': np.arange(20, 20 + len(gp_polygons)),
        'v': np.arange(30, 30 + len(gp_polygons)),
    }).set_index('b')

    # Generate expected result as geopandas GeoDataFrame
    gp_expected = gp.sjoin(left_gpdf, right_gpdf, how=how)
    gp_expected = gp_expected.rename(columns={"v_x": "v_left", "v_y": "v_right"})
    if how == "right":
        gp_expected.index.name = right_gpdf.index.name
    else:
        gp_expected.index.name = left_gpdf.index.name

    # join with spatialpandas
    left_spdf = GeoDataFrame(left_gpdf)
    right_spdf = GeoDataFrame(right_gpdf)

    result = sp.sjoin(
        left_spdf, right_spdf, how=how
    ).sort_values(['v_left', 'v_right'])
    assert isinstance(result, GeoDataFrame)

    # Check pandas results
github csc-training / geocomputing / machineLearning / 01_data_preparation / 01_vectorDataPreparations.py View on Github external
def returnPointGeometryFromXY(polygon_geometry):
        ## Calculate x and y of the centroid
        centroid_x,centroid_y = polygon_geometry.centroid.x,polygon_geometry.centroid.y
        ## Create a shapely Point geometry of the x and y coords
        point_geometry = Point(centroid_x,centroid_y)
        return point_geometry

    ### Stash the polygon geometry to another column as we are going to overwrite the 'geometry' with centroid geometry
    scaled_gdf['polygon_geometry'] = scaled_gdf['geometry']

    ### We will be joining the region name to zip codes according to the zip code centroid. 
    ### This calls the function above and returns centroid to every row
    scaled_gdf["geometry"] = scaled_gdf['geometry'].apply(returnPointGeometryFromXY)

    ### Spatially join the region name to the zip codes using the centroid of zip codes and region polygons
    scaled_gdf = gpd.sjoin(scaled_gdf,finnish_regions_gdf,how='inner',op='intersects')

    ### Switch the polygon geometry back to the 'geometry' field and drop uselesss columns
    scaled_gdf['geometry'] = scaled_gdf['polygon_geometry']
    scaled_gdf.drop(['index_right','polygon_geometry'],axis=1, inplace=True)

    ### Encode the region name with the One-hot encoding (= pandas dummy encoding) 
    ### method so machine learning can understand it better
    encoded_gdf = pd.get_dummies(scaled_gdf['NAMEFIN'])

    ### Join scaled gdf and encoded gdf together
    scaled_and_encoded_gdf = scaled_gdf.join(encoded_gdf).drop('NAMEFIN',axis=1)

    ### The resulting dataframe has Polygon and Multipolygon geometries. 
    ### This upcasts the polygons to multipolygon format so all of them have the same
    scaled_and_encoded_gdf["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in scaled_and_encoded_gdf["geometry"]]
    print("Dataframe size after adding region name: " + str(len(scaled_and_encoded_gdf.index))+ " zip codes with " + str(len(scaled_and_encoded_gdf.columns)) + " columns")
github mikeasilva / cgr-work / general-python-scripts / Update US_PatentData_USPTO.py View on Github external
locations_df = gpd.GeoDataFrame(locations_df, geometry=geom)
    locations_df = locations_df[['location_id', 'geometry']]
    locations_df.crs = {'init': 'epsg:4326'}
    print(' Creating locations data frame [DONE]')
    
    # Read in the County boundaries
    print(' Reading county shapefile', end="\r", flush=True)
    counties_df = gpd.read_file('gz_2010_us_050_00_500k.shp')
    counties_df = counties_df.to_crs(locations_df.crs)
    counties_df['county_fips'] = counties_df['STATE'] + counties_df['COUNTY']
    counties_df['state_fips'] = counties_df['STATE']
    counties_df = counties_df[['county_fips', 'state_fips', 'geometry']]
    print(' Reading county shapefile [DONE]')

    print(' Preforming spatial join', end="\r", flush=True)
    gdf = gpd.sjoin(locations_df, counties_df, op='within')
    gdf.reset_index(inplace=True)
    gdf = gdf[['location_id', 'state_fips', 'county_fips']]
    gdf.to_sql('location', con=db, index=False)
    print(' Preforming spatial join [DONE]')

locations = gdf.set_index('location_id').to_dict('index')

# Get the list of location ids we need to search for
location_ids = list(gdf.location_id)

try:
    c.execute('SELECT * FROM patent_location LIMIT 1')
except:
    c.execute('CREATE TABLE patent_location (patent_id text, location_id text)')
    values_to_insert = list()
github martinfleis / momepy / momepy / elements.py View on Github external
print("Defining street-based blocks...") if verbose else None
        blocks_single = blocks_gdf.dissolve(by="patch")
        blocks_single.crs = buildings.crs

        blocks_single["geometry"] = blocks_single.buffer(0.1)

        print("Defining block ID...") if verbose else None  # street based
        blocks_single[id_name] = range(len(blocks_single))

        print("Generating centroids...") if verbose else None
        buildings_c = buildings.copy()
        buildings_c["geometry"] = buildings_c.representative_point()  # make points

        print("Spatial join...") if verbose else None
        centroids_tempID = gpd.sjoin(
            buildings_c, blocks_single, how="left", op="intersects"
        )

        tempID_to_uID = centroids_tempID[[unique_id, id_name]]

        print("Attribute join (tesselation)...") if verbose else None
        cells_copy = cells_copy.merge(tempID_to_uID, on=unique_id, how="left")

        print("Generating blocks...") if verbose else None
        blocks = cells_copy.dissolve(by=id_name)

        print("Multipart to singlepart...") if verbose else None
        blocks = blocks.explode()
        blocks.reset_index(inplace=True, drop=True)

        blocks["geometry"] = blocks.exterior
github ESA-PhiLab / OpenSarToolkit / ost / s1 / burst.py View on Github external
Args:

    Returns:


    '''

    # turn aoi into a geodataframe
    aoi_gdf = gpd.GeoDataFrame(vec.wkt_to_gdf(aoi).buffer(0.05))
    aoi_gdf.columns = ['geometry']
    # get columns of input dataframe for later return function
    cols = burst_gdf.columns

    # 1) get only intersecting footprints (double, since we do this before)
    burst_gdf = gpd.sjoin(burst_gdf, aoi_gdf, how='inner', op='intersects')

    # if aoi  gdf has an id field we need to rename the changed id_left field
    if 'id_left' in burst_gdf.columns.tolist():
        # rename id_left to id
        burst_gdf.columns = (['id' if x == 'id_left' else x
                              for x in burst_gdf.columns.tolist()])
    
    # remove duplicates
    burst_gdf.drop_duplicates(['SceneID', 'Date', 'bid'], inplace=True)
    
    # check if number of bursts align with number of coverages
    if coverages:
        for burst in burst_gdf.bid.unique():
            if len(burst_gdf[burst_gdf.bid == burst]) != coverages:
                print(' INFO. Removing burst {} because of'
                      ' unsuffcient coverage.'.format(burst))
github CxAalto / gtfspy / gtfspy / aggregate_stops.py View on Github external
gdf_b = get_stops_gdf(gtfs_b, "b")
    gdf_ab = pd.concat([gdf_a, gdf_b])
    gdf_ab = gdf_ab.reset_index()
    gdf_poly = gdf_ab.copy()
    gdf_poly["geometry"] = gdf_poly["geometry"].buffer(distance/2)
    gdf_poly["everything"] = 1

    gdf_poly = gdf_poly.dissolve(by="everything")

    polygons = None
    for geoms in gdf_poly["geometry"]:
        polygons = [polygon for polygon in geoms]

    single_parts = GeoDataFrame(crs=crs_eurefin, geometry=polygons)
    single_parts['new_stop_I'] = single_parts.index
    gdf_joined = sjoin(gdf_ab, single_parts, how="left", op='within')
    single_parts["geometry"] = single_parts.centroid
    gdf_joined = gdf_joined.drop('geometry', 1)
    centroid_stops = single_parts.merge(gdf_joined, on="new_stop_I")
    """JOIN BY lat&lon"""
    # produce table with items to update and add to the two tables
    # add new id row to separete the tables
    # merge (outer) a & b dfs to this main df
    # Na rows-> rows to be added
    # other rows, rows to be updated
    def get_stops_to_add_and_update(gtfs, gdf, name):
        gdf = pd.merge(centroid_stops, gdf[['stop_I', 'feed_id', 'desc']], how='left', on=['stop_I', 'feed_id'],
                         suffixes=('', '_x'))

        nanrows = gdf.loc[gdf['desc_x'].isnull()]
        gdf = gdf.loc[gdf['desc_x'].notnull()]
        nanrows = nanrows.loc[~nanrows['new_stop_I'].isin(gdf['new_stop_I'])]
github gschivley / PowerGenome / powergenome / generators.py View on Github external
planned = planned.dropna(subset=["latitude", "longitude"])

    # Convert the lon/lat values to geo points. Need to add an initial CRS and then
    # change it to align with the IPM regions
    print("Creating gdf")
    planned_gdf = gpd.GeoDataFrame(
        planned.copy(),
        geometry=gpd.points_from_xy(planned.longitude.copy(), planned.latitude.copy()),
        crs={"init": "epsg:4326"},
    )
    # planned_gdf.crs = {"init": "epsg:4326"}
    if planned_gdf.crs != model_regions_gdf.crs:
        planned_gdf = planned_gdf.to_crs(model_regions_gdf.crs)

    planned_gdf = gpd.sjoin(model_regions_gdf.drop(columns="IPM_Region"), planned_gdf)

    # Add planned additions from the settings file
    if settings["additional_planned"]:
        i = 0
        for record in settings["additional_planned"]:
            plant_id, gen_id, model_region = record
            plant_record = planned.loc[
                (planned["plant_id_eia"] == plant_id)
                & (planned["generator_id"] == gen_id),
                :,
            ]
            plant_record["model_region"] = model_region

            planned_gdf = planned_gdf.append(plant_record, sort=False)
            i += 1
        logger.info(f"{i} generators were added to the planned list based on settings")
github Oslandia / deeposlandia / deeposlandia / geometries.py View on Github external
Returns
    -------
    geopandas.GeoDataFrame
        Set of ground-truth labels contained into the tile, characterized by
    their type (complete, unfinished or foundation) and their geometry

    """
    area = get_tile_footprint(
        raster_features, min_x, min_y, tile_width, tile_height
    )
    bdf = gpd.GeoDataFrame(
        crs=from_epsg(raster_features["srid"]), geometry=[area]
    )
    reproj_labels = labels.to_crs(epsg=raster_features["srid"])
    tile_items = gpd.sjoin(reproj_labels, bdf)
    if tile_items.shape[0] == 0:
        return tile_items[["condition", "geometry"]]
    tile_items = gpd.overlay(tile_items, bdf)
    tile_items = tile_items.explode()  # Manage MultiPolygons
    return tile_items[["condition", "geometry"]]
github anitagraser / movingpandas / sandbox / compute_regional_entropy.py View on Github external
def prepare_trajectories(data_path, trajectory_id, regions_df):
    df = read_file(data_path)
    df['t'] = pd.to_datetime(df['t'])
    df = df.set_index('t')
    pts_with_region = sjoin(df, regions_df, how="inner", op='intersects')
    trajectory_list = []
    for traj_id, rows in pts_with_region.groupby([trajectory_id]):
        trajectory = Trajectory(traj_id, rows)
        trajectory.add_direction()
        trajectory.add_speed()
        trajectory_list.append(trajectory)
    return trajectory_list