Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_spatial_join(self):
cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities'))
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
countries = world[['geometry', 'name']]
countries = countries.rename(columns={'name':'country'})
cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects')
self.assertTrue(cities_with_country.size > 1)
left_gpdf = gp.GeoDataFrame({
'geometry': gp_points,
'a': np.arange(10, 10 + len(gp_points)),
'c': np.arange(20, 20 + len(gp_points)),
'v': np.arange(30, 30 + len(gp_points)),
}).set_index('a')
right_gpdf = gp.GeoDataFrame({
'geometry': gp_polygons,
'a': np.arange(10, 10 + len(gp_polygons)),
'b': np.arange(20, 20 + len(gp_polygons)),
'v': np.arange(30, 30 + len(gp_polygons)),
}).set_index('b')
# Generate expected result as geopandas GeoDataFrame
gp_expected = gp.sjoin(left_gpdf, right_gpdf, how=how)
gp_expected = gp_expected.rename(columns={"v_x": "v_left", "v_y": "v_right"})
if how == "right":
gp_expected.index.name = right_gpdf.index.name
else:
gp_expected.index.name = left_gpdf.index.name
# join with spatialpandas
left_spdf = GeoDataFrame(left_gpdf)
right_spdf = GeoDataFrame(right_gpdf)
result = sp.sjoin(
left_spdf, right_spdf, how=how
).sort_values(['v_left', 'v_right'])
assert isinstance(result, GeoDataFrame)
# Check pandas results
def returnPointGeometryFromXY(polygon_geometry):
## Calculate x and y of the centroid
centroid_x,centroid_y = polygon_geometry.centroid.x,polygon_geometry.centroid.y
## Create a shapely Point geometry of the x and y coords
point_geometry = Point(centroid_x,centroid_y)
return point_geometry
### Stash the polygon geometry to another column as we are going to overwrite the 'geometry' with centroid geometry
scaled_gdf['polygon_geometry'] = scaled_gdf['geometry']
### We will be joining the region name to zip codes according to the zip code centroid.
### This calls the function above and returns centroid to every row
scaled_gdf["geometry"] = scaled_gdf['geometry'].apply(returnPointGeometryFromXY)
### Spatially join the region name to the zip codes using the centroid of zip codes and region polygons
scaled_gdf = gpd.sjoin(scaled_gdf,finnish_regions_gdf,how='inner',op='intersects')
### Switch the polygon geometry back to the 'geometry' field and drop uselesss columns
scaled_gdf['geometry'] = scaled_gdf['polygon_geometry']
scaled_gdf.drop(['index_right','polygon_geometry'],axis=1, inplace=True)
### Encode the region name with the One-hot encoding (= pandas dummy encoding)
### method so machine learning can understand it better
encoded_gdf = pd.get_dummies(scaled_gdf['NAMEFIN'])
### Join scaled gdf and encoded gdf together
scaled_and_encoded_gdf = scaled_gdf.join(encoded_gdf).drop('NAMEFIN',axis=1)
### The resulting dataframe has Polygon and Multipolygon geometries.
### This upcasts the polygons to multipolygon format so all of them have the same
scaled_and_encoded_gdf["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in scaled_and_encoded_gdf["geometry"]]
print("Dataframe size after adding region name: " + str(len(scaled_and_encoded_gdf.index))+ " zip codes with " + str(len(scaled_and_encoded_gdf.columns)) + " columns")
locations_df = gpd.GeoDataFrame(locations_df, geometry=geom)
locations_df = locations_df[['location_id', 'geometry']]
locations_df.crs = {'init': 'epsg:4326'}
print(' Creating locations data frame [DONE]')
# Read in the County boundaries
print(' Reading county shapefile', end="\r", flush=True)
counties_df = gpd.read_file('gz_2010_us_050_00_500k.shp')
counties_df = counties_df.to_crs(locations_df.crs)
counties_df['county_fips'] = counties_df['STATE'] + counties_df['COUNTY']
counties_df['state_fips'] = counties_df['STATE']
counties_df = counties_df[['county_fips', 'state_fips', 'geometry']]
print(' Reading county shapefile [DONE]')
print(' Preforming spatial join', end="\r", flush=True)
gdf = gpd.sjoin(locations_df, counties_df, op='within')
gdf.reset_index(inplace=True)
gdf = gdf[['location_id', 'state_fips', 'county_fips']]
gdf.to_sql('location', con=db, index=False)
print(' Preforming spatial join [DONE]')
locations = gdf.set_index('location_id').to_dict('index')
# Get the list of location ids we need to search for
location_ids = list(gdf.location_id)
try:
c.execute('SELECT * FROM patent_location LIMIT 1')
except:
c.execute('CREATE TABLE patent_location (patent_id text, location_id text)')
values_to_insert = list()
print("Defining street-based blocks...") if verbose else None
blocks_single = blocks_gdf.dissolve(by="patch")
blocks_single.crs = buildings.crs
blocks_single["geometry"] = blocks_single.buffer(0.1)
print("Defining block ID...") if verbose else None # street based
blocks_single[id_name] = range(len(blocks_single))
print("Generating centroids...") if verbose else None
buildings_c = buildings.copy()
buildings_c["geometry"] = buildings_c.representative_point() # make points
print("Spatial join...") if verbose else None
centroids_tempID = gpd.sjoin(
buildings_c, blocks_single, how="left", op="intersects"
)
tempID_to_uID = centroids_tempID[[unique_id, id_name]]
print("Attribute join (tesselation)...") if verbose else None
cells_copy = cells_copy.merge(tempID_to_uID, on=unique_id, how="left")
print("Generating blocks...") if verbose else None
blocks = cells_copy.dissolve(by=id_name)
print("Multipart to singlepart...") if verbose else None
blocks = blocks.explode()
blocks.reset_index(inplace=True, drop=True)
blocks["geometry"] = blocks.exterior
Args:
Returns:
'''
# turn aoi into a geodataframe
aoi_gdf = gpd.GeoDataFrame(vec.wkt_to_gdf(aoi).buffer(0.05))
aoi_gdf.columns = ['geometry']
# get columns of input dataframe for later return function
cols = burst_gdf.columns
# 1) get only intersecting footprints (double, since we do this before)
burst_gdf = gpd.sjoin(burst_gdf, aoi_gdf, how='inner', op='intersects')
# if aoi gdf has an id field we need to rename the changed id_left field
if 'id_left' in burst_gdf.columns.tolist():
# rename id_left to id
burst_gdf.columns = (['id' if x == 'id_left' else x
for x in burst_gdf.columns.tolist()])
# remove duplicates
burst_gdf.drop_duplicates(['SceneID', 'Date', 'bid'], inplace=True)
# check if number of bursts align with number of coverages
if coverages:
for burst in burst_gdf.bid.unique():
if len(burst_gdf[burst_gdf.bid == burst]) != coverages:
print(' INFO. Removing burst {} because of'
' unsuffcient coverage.'.format(burst))
gdf_b = get_stops_gdf(gtfs_b, "b")
gdf_ab = pd.concat([gdf_a, gdf_b])
gdf_ab = gdf_ab.reset_index()
gdf_poly = gdf_ab.copy()
gdf_poly["geometry"] = gdf_poly["geometry"].buffer(distance/2)
gdf_poly["everything"] = 1
gdf_poly = gdf_poly.dissolve(by="everything")
polygons = None
for geoms in gdf_poly["geometry"]:
polygons = [polygon for polygon in geoms]
single_parts = GeoDataFrame(crs=crs_eurefin, geometry=polygons)
single_parts['new_stop_I'] = single_parts.index
gdf_joined = sjoin(gdf_ab, single_parts, how="left", op='within')
single_parts["geometry"] = single_parts.centroid
gdf_joined = gdf_joined.drop('geometry', 1)
centroid_stops = single_parts.merge(gdf_joined, on="new_stop_I")
"""JOIN BY lat&lon"""
# produce table with items to update and add to the two tables
# add new id row to separete the tables
# merge (outer) a & b dfs to this main df
# Na rows-> rows to be added
# other rows, rows to be updated
def get_stops_to_add_and_update(gtfs, gdf, name):
gdf = pd.merge(centroid_stops, gdf[['stop_I', 'feed_id', 'desc']], how='left', on=['stop_I', 'feed_id'],
suffixes=('', '_x'))
nanrows = gdf.loc[gdf['desc_x'].isnull()]
gdf = gdf.loc[gdf['desc_x'].notnull()]
nanrows = nanrows.loc[~nanrows['new_stop_I'].isin(gdf['new_stop_I'])]
planned = planned.dropna(subset=["latitude", "longitude"])
# Convert the lon/lat values to geo points. Need to add an initial CRS and then
# change it to align with the IPM regions
print("Creating gdf")
planned_gdf = gpd.GeoDataFrame(
planned.copy(),
geometry=gpd.points_from_xy(planned.longitude.copy(), planned.latitude.copy()),
crs={"init": "epsg:4326"},
)
# planned_gdf.crs = {"init": "epsg:4326"}
if planned_gdf.crs != model_regions_gdf.crs:
planned_gdf = planned_gdf.to_crs(model_regions_gdf.crs)
planned_gdf = gpd.sjoin(model_regions_gdf.drop(columns="IPM_Region"), planned_gdf)
# Add planned additions from the settings file
if settings["additional_planned"]:
i = 0
for record in settings["additional_planned"]:
plant_id, gen_id, model_region = record
plant_record = planned.loc[
(planned["plant_id_eia"] == plant_id)
& (planned["generator_id"] == gen_id),
:,
]
plant_record["model_region"] = model_region
planned_gdf = planned_gdf.append(plant_record, sort=False)
i += 1
logger.info(f"{i} generators were added to the planned list based on settings")
Returns
-------
geopandas.GeoDataFrame
Set of ground-truth labels contained into the tile, characterized by
their type (complete, unfinished or foundation) and their geometry
"""
area = get_tile_footprint(
raster_features, min_x, min_y, tile_width, tile_height
)
bdf = gpd.GeoDataFrame(
crs=from_epsg(raster_features["srid"]), geometry=[area]
)
reproj_labels = labels.to_crs(epsg=raster_features["srid"])
tile_items = gpd.sjoin(reproj_labels, bdf)
if tile_items.shape[0] == 0:
return tile_items[["condition", "geometry"]]
tile_items = gpd.overlay(tile_items, bdf)
tile_items = tile_items.explode() # Manage MultiPolygons
return tile_items[["condition", "geometry"]]
def prepare_trajectories(data_path, trajectory_id, regions_df):
df = read_file(data_path)
df['t'] = pd.to_datetime(df['t'])
df = df.set_index('t')
pts_with_region = sjoin(df, regions_df, how="inner", op='intersects')
trajectory_list = []
for traj_id, rows in pts_with_region.groupby([trajectory_id]):
trajectory = Trajectory(traj_id, rows)
trajectory.add_direction()
trajectory.add_speed()
trajectory_list.append(trajectory)
return trajectory_list