Source code for sedona.spark.maps.SedonaMapUtils

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json

from sedona.spark.sql.types import GeometryType
from sedona.spark.geoarrow.geoarrow import dataframe_to_arrow


[docs] class SedonaMapUtils: @classmethod def __convert_to_gdf_or_pdf__(cls, df, rename=True, geometry_col=None): """ Converts a SedonaDataFrame to a GeoPandasDataFrame and also renames geometry column to a standard name of 'geometry' However, if no geometry column is found even after traversing schema, returns a Pandas Dataframe :param df: SedonaDataFrame to convert :param geometry_col: [Optional] :return: GeoPandas Dataframe or Pandas Dataframe """ if geometry_col is None: geometry_col = SedonaMapUtils.__get_geometry_col__(df) # Convert the dataframe to arrow format, then to geopandas dataframe # This is faster than converting directly to geopandas dataframe via toPandas if ( geometry_col is None ): # No geometry column found even after searching schema, return Pandas Dataframe data_pyarrow = dataframe_to_arrow(df) return data_pyarrow.to_pandas() try: import geopandas as gpd # packaging is a dependency of geopandas from packaging.version import parse except ImportError: msg = "GeoPandas is missing. You can install it manually or via apache-sedona[kepler-map] or apache-sedona[pydeck-map]." raise ImportError(msg) from None # From GeoPandas 1.0.0 onwards, the from_arrow method is available if parse(gpd.__version__) >= parse("1.0.0"): data_pyarrow = dataframe_to_arrow(df) geo_df = gpd.GeoDataFrame.from_arrow(data_pyarrow) else: geo_df = gpd.GeoDataFrame(df.toPandas(), geometry=geometry_col) if geometry_col != "geometry" and rename is True: geo_df.rename_geometry("geometry", inplace=True) return geo_df @classmethod def __convert_to_geojson__(cls, df): """ Converts a SedonaDataFrame to GeoJSON :param df: SedonaDataFrame to convert :return: GeoJSON object """ gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df) gjson_str = gdf.to_json() gjson = json.loads(gjson_str) return gjson @classmethod def __get_geometry_col__(cls, df): schema = df.schema for field in schema.fields: if field.dataType == GeometryType(): return field.name @classmethod def __extract_coordinate__(cls, geom, type_list): geom_type = geom.geom_type if SedonaMapUtils.__is_geom_collection__(geom_type): geom = SedonaMapUtils._extract_first_sub_geometry_(geom) geom_type = geom.geom_type if geom_type not in type_list: type_list.append(geom_type) if geom_type == "Polygon": return geom.exterior.coords[0] else: return geom.coords[0] @classmethod def __extract_point_coordinate__(cls, geom): if geom.geom_type == "Point": return geom.coords[0] @classmethod def _extract_first_sub_geometry_(cls, geom): while SedonaMapUtils.__is_geom_collection__(geom.geom_type): geom = geom.geoms[0] return geom @classmethod def __is_geom_collection__(cls, geom_type): return ( geom_type == "MultiPolygon" or geom_type == "MultiLineString" or geom_type == "MultiPoint" or geom_type == "GeometryCollection" )