Kaggle Course – geospatial analysis summary

Source: https://www.kaggle.com/learn/geospatial-analysis,

This is the code summary note for the Kaggle geospatial analysis

geopandas frame has a geometry column that shows either a point/line/polygon

Part 1: Your first map

source: https://www.kaggle.com/alexisbcook/your-first-map

import geopandas as gpd
from learntools.core import binder
binder.bind(globals())
from learntools.geospatial.ex1 import *

### LOAD DATA ###

# shapefile
shapef = gpd.read_file(loans_sahpe_filepath)
# shpae file comes with crs (coordinate reference)
print(regions.crs)

# kml data
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
kmlf = gpd.read_file("mykmldata.kml", driver='KML')


# geopandas dataset
world_filepath = gpd.datasets.get_path('naturalearth_lowres')
gpdf = gpd.read_file(world_filepath)


### MAPPING
# plotting the basemap
ax = world.plot(figsize=(20,20), color='whitesmoke', linestyle=':', edgecolor='black')

# adding your data on the basedmap ax
shapef.plot(color='black', markersize=2, ax=ax)

# subsetting data
subshapef = shapef.loc[world_loans.country == 'Philippines'].copy()

Part II – Coordinate Reference Systems

Source: https://www.kaggle.com/alexisbcook/coordinate-reference-systems

# shapefile comes with the crs 
shapef = gpd.read_file(loans_sahpe_filepath)
print(shapef.crs)

# need to set crs for csv file load that has lon and lat
csvf = pd.read_csv("mycsvfile.csv")

# converts the csv dataframe to geodataframe 
gdf = gpd.GeoDataFrame(csvf, geometry=gdp.points_from_xy(csvf.lon, csvf.lat))
# manually set the crs to EPSG 4326
gdf.crs = {'init': 'epsg:4326'}


# reprojecting with differetn crs
ax = world.plot(figsize=(8,8), color='whitesmoke', linestyle=':', edgecolor='black')
gdf.to_crs(epsg=32630).plot(markersize=1, ax=ax)

# in case EPSG code is not available in geopandas, here's a hack example
# Change the CRS to EPSG 4326
gdf.to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs").head()

# accessing geometry attributes
# Get the x-coordinate of each point
gdf.geometry.head().x 
gdf.geometry.head().y

# Get the area attribute from geometry
gdf.geometry.area 

# Add a column "AREA" in the gdf dataframe
gdf.loc[:, "AREA"] = gdf.geometry.area / 10**6 #converting to square meters


print("Area of entire country in gdf: {} square kilometers".format(gdf.AREA.sum()))  #adding all rows of AREA
print("CRS:", gdf.crs)
gdf.head()