# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
A base class of Sedona/Spark DataFrame/Column to behave like geopandas GeoDataFrame/GeoSeries.
"""
from abc import ABCMeta, abstractmethod
from typing import (
Any,
Callable,
Optional,
Union,
)
from shapely.geometry.base import BaseGeometry
import geopandas as gpd
import pandas as pd
import pyspark.pandas as ps
from pyspark.pandas._typing import (
Axis,
Dtype,
Scalar,
)
from pyspark.sql import Column
from sedona.spark.geopandas._typing import GeoFrameLike
bool_type = bool
[docs]
class GeoFrame(metaclass=ABCMeta):
"""
A base class for both GeoDataFrame and GeoSeries.
"""
@property
def sindex(self) -> "SpatialIndex":
"""
Returns a spatial index for the GeoSeries.
Note that the spatial index may not be fully
initialized until the first use.
Currently, sindex is not retained when calling this method from a GeoDataFrame.
You can workaround this by first extracting the active geometry column as a GeoSeries,
and calling this method.
Returns
-------
SpatialIndex
The spatial index.
Examples
--------
>>> from shapely.geometry import Point, box
>>> from sedona.spark.geopandas import GeoSeries
>>>
>>> s = GeoSeries([Point(x, x) for x in range(5)])
>>> s.sindex.query(box(1, 1, 3, 3))
[Point(1, 1), Point(2, 2), Point(3, 3)]
>>> s.has_sindex
True
"""
return _delegate_to_geometry_column("sindex", self)
@property
def has_sindex(self):
"""Check the existence of the spatial index without generating it.
Use the `.sindex` attribute on a GeoDataFrame or GeoSeries
to generate a spatial index if it does not yet exist,
which may take considerable time based on the underlying index
implementation.
Note that the underlying spatial index may not be fully
initialized until the first use.
Currently, sindex is not retained when calling this method from a GeoDataFrame.
You can workaround this by first extracting the active geometry column as a GeoSeries,
and calling this method.
Examples
--------
>>> from shapely.geometry import Point
>>> s = GeoSeries([Point(x, x) for x in range(5)])
>>> s.has_sindex
False
>>> index = s.sindex
>>> s.has_sindex
True
Returns
-------
bool
`True` if the spatial index has been generated or
`False` if not.
"""
return _delegate_to_geometry_column("has_sindex", self)
[docs]
@abstractmethod
def copy(self: GeoFrameLike) -> GeoFrameLike:
raise NotImplementedError("This method is not implemented yet.")
@property
def area(self) -> ps.Series:
"""
Returns a Series containing the area of each geometry in the GeoSeries expressed in the units of the CRS.
Returns
-------
Series
A Series containing the area of each geometry.
Examples
--------
>>> from shapely.geometry import Polygon
>>> from sedona.spark.geopandas import GeoSeries
>>> gs = GeoSeries([Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])])
>>> gs.area
0 1.0
1 4.0
dtype: float64
"""
return _delegate_to_geometry_column("area", self)
@property
def geom_type(self):
"""
Returns a series of strings specifying the geometry type of each geometry of each object.
Note: Unlike Geopandas, Sedona returns LineString instead of LinearRing.
Returns
-------
Series
A Series containing the geometry type of each geometry.
Examples
--------
>>> from shapely.geometry import Polygon, Point
>>> from sedona.spark.geopandas import GeoSeries
>>> gs = GeoSeries([Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), Point(0, 0)])
>>> gs.geom_type
0 POLYGON
1 POINT
dtype: object
"""
return _delegate_to_geometry_column("geom_type", self)
@property
@abstractmethod
def type(self):
raise NotImplementedError("This method is not implemented yet.")
@property
def length(self):
"""
Returns a Series containing the length of each geometry in the GeoSeries.
In the case of a (Multi)Polygon it measures the length of its exterior (i.e. perimeter).
For a GeometryCollection it measures sums the values for each of the individual geometries.
Returns
-------
Series
A Series containing the length of each geometry.
Examples
--------
>>> from shapely.geometry import Polygon
>>> from sedona.spark.geopandas import GeoSeries
>>> gs = GeoSeries([Point(0, 0), LineString([(0, 0), (1, 1)]), Polygon([(0, 0), (1, 0), (1, 1)]), GeometryCollection([Point(0, 0), LineString([(0, 0), (1, 1)]), Polygon([(0, 0), (1, 0), (1, 1)])])])
>>> gs.length
0 0.000000
1 1.414214
2 3.414214
3 4.828427
dtype: float64
"""
return _delegate_to_geometry_column("length", self)
@property
def is_valid(self):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
geometries that are valid.
Examples
--------
An example with one invalid polygon (a bowtie geometry crossing itself)
and one missing geometry:
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... Polygon([(0,0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry
... Polygon([(0, 0), (2, 2), (2, 0)]),
... None
... ]
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))
2 POLYGON ((0 0, 2 2, 2 0, 0 0))
3 None
dtype: geometry
>>> s.is_valid
0 True
1 False
2 True
3 False
dtype: bool
See also
--------
GeoSeries.is_valid_reason : reason for invalidity
"""
return _delegate_to_geometry_column("is_valid", self)
[docs]
def is_valid_reason(self):
"""Returns a ``Series`` of strings with the reason for invalidity of
each geometry.
Examples
--------
An example with one invalid polygon (a bowtie geometry crossing itself)
and one missing geometry:
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... Polygon([(0,0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry
... Polygon([(0, 0), (2, 2), (2, 0)]),
... Polygon([(0, 0), (2, 0), (1, 1), (2, 2), (0, 2), (1, 1), (0, 0)]),
... None
... ]
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))
2 POLYGON ((0 0, 2 2, 2 0, 0 0))
3 None
dtype: geometry
>>> s.is_valid_reason()
0 Valid Geometry
1 Self-intersection at or near point (0.5, 0.5, NaN)
2 Valid Geometry
3 Ring Self-intersection at or near point (1.0, 1.0)
4 None
dtype: object
See also
--------
GeoSeries.is_valid : detect invalid geometries
GeoSeries.make_valid : fix invalid geometries
"""
return _delegate_to_geometry_column("is_valid_reason", self)
@property
def is_empty(self):
"""
Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
empty geometries.
Examples
--------
An example of a GeoDataFrame with one empty point, one point and one missing
value:
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Point
>>> geoseries = GeoSeries([Point(), Point(2, 1), None], crs="EPSG:4326")
>>> geoseries
0 POINT EMPTY
1 POINT (2 1)
2 None
>>> geoseries.is_empty
0 True
1 False
2 False
dtype: bool
See Also
--------
GeoSeries.isna : detect missing geometries
"""
return _delegate_to_geometry_column("is_empty", self)
# def count_coordinates(self):
# raise NotImplementedError("This method is not implemented yet.")
# def count_geometries(self):
# raise NotImplementedError("This method is not implemented yet.")
# def count_interior_rings(self):
# raise NotImplementedError("This method is not implemented yet.")
@property
def is_simple(self):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
geometries that do not cross themselves.
This is meaningful only for `LineStrings` and `LinearRings`.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import LineString
>>> s = GeoSeries(
... [
... LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
... LineString([(0, 0), (1, 1), (1, -1)]),
... ]
... )
>>> s
0 LINESTRING (0 0, 1 1, 1 -1, 0 1)
1 LINESTRING (0 0, 1 1, 1 -1)
dtype: geometry
>>> s.is_simple
0 False
1 True
dtype: bool
"""
return _delegate_to_geometry_column("is_simple", self)
@property
def is_ring(self):
"""Return a ``Series`` of ``dtype('bool')`` with value ``True`` for
features that are closed.
When constructing a LinearRing, the sequence of coordinates may be
explicitly closed by passing identical values in the first and last indices.
Otherwise, the sequence will be implicitly closed by copying the first tuple
to the last index.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import LineString, LinearRing
>>> s = GeoSeries(
... [
... LineString([(0, 0), (1, 1), (1, -1)]),
... LineString([(0, 0), (1, 1), (1, -1), (0, 0)]),
... LinearRing([(0, 0), (1, 1), (1, -1)]),
... ]
... )
>>> s
0 LINESTRING (0 0, 1 1, 1 -1)
1 LINESTRING (0 0, 1 1, 1 -1, 0 0)
2 LINEARRING (0 0, 1 1, 1 -1, 0 0)
dtype: geometry
>>> s.is_ring
0 False
1 True
2 True
dtype: bool
"""
return _delegate_to_geometry_column("is_ring", self)
# @property
# def is_ccw(self):
# raise NotImplementedError("This method is not implemented yet.")
# @property
# def is_closed(self):
# raise NotImplementedError("This method is not implemented yet.")
@property
def has_z(self):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
features that have a z-component.
Notes
-----
Every operation in GeoPandas is planar, i.e. the potential third
dimension is not taken into account.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Point
>>> s = GeoSeries(
... [
... Point(0, 1),
... Point(0, 1, 2),
... ]
... )
>>> s
0 POINT (0 1)
1 POINT Z (0 1 2)
dtype: geometry
>>> s.has_z
0 False
1 True
dtype: bool
"""
return _delegate_to_geometry_column("has_z", self)
# def get_precision(self):
# raise NotImplementedError("This method is not implemented yet.")
[docs]
def get_geometry(self, index):
"""Returns the n-th geometry from a collection of geometries (0-indexed).
If the index is non-negative, it returns the geometry at that index.
If the index is negative, it counts backward from the end of the collection (e.g., -1 returns the last geometry).
Returns None if the index is out of bounds.
Note: Simple geometries act as length-1 collections
Note: Using Shapely < 2.0, may lead to different results for empty simple geometries due to how
shapely interprets them.
Parameters
----------
index : int or array_like
Position of a geometry to be retrieved within its collection
Returns
-------
GeoSeries
Notes
-----
Simple geometries act as collections of length 1. Any out-of-range index value
returns None.
Examples
--------
>>> from shapely.geometry import Point, MultiPoint, GeometryCollection
>>> s = geopandas.GeoSeries(
... [
... Point(0, 0),
... MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0)]),
... GeometryCollection(
... [MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0)]), Point(0, 1)]
... ),
... Polygon(),
... GeometryCollection(),
... ]
... )
>>> s
0 POINT (0 0)
1 MULTIPOINT ((0 0), (1 1), (0 1), (1 0))
2 GEOMETRYCOLLECTION (MULTIPOINT ((0 0), (1 1), ...
3 POLYGON EMPTY
4 GEOMETRYCOLLECTION EMPTY
dtype: geometry
>>> s.get_geometry(0)
0 POINT (0 0)
1 POINT (0 0)
2 MULTIPOINT ((0 0), (1 1), (0 1), (1 0))
3 POLYGON EMPTY
4 None
dtype: geometry
>>> s.get_geometry(1)
0 None
1 POINT (1 1)
2 POINT (0 1)
3 None
4 None
dtype: geometry
>>> s.get_geometry(-1)
0 POINT (0 0)
1 POINT (1 0)
2 POINT (0 1)
3 POLYGON EMPTY
4 None
dtype: geometry
"""
return _delegate_to_geometry_column("get_geometry", self, index)
@property
def boundary(self):
"""Returns a ``GeoSeries`` of lower dimensional objects representing
each geometry's set-theoretic `boundary`.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(0, 0), (1, 1), (1, 0)]),
... Point(0, 0),
... ]
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 LINESTRING (0 0, 1 1, 1 0)
2 POINT (0 0)
dtype: geometry
>>> s.boundary
0 LINESTRING (0 0, 1 1, 0 1, 0 0)
1 MULTIPOINT ((0 0), (1 0))
2 GEOMETRYCOLLECTION EMPTY
dtype: geometry
See also
--------
GeoSeries.exterior : outer boundary (without interior rings)
"""
return _delegate_to_geometry_column("boundary", self)
@property
def centroid(self):
"""Returns a ``GeoSeries`` of points representing the centroid of each
geometry.
Note that centroid does not have to be on or within original geometry.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(0, 0), (1, 1), (1, 0)]),
... Point(0, 0),
... ]
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 LINESTRING (0 0, 1 1, 1 0)
2 POINT (0 0)
dtype: geometry
>>> s.centroid
0 POINT (0.33333 0.66667)
1 POINT (0.70711 0.5)
2 POINT (0 0)
dtype: geometry
See also
--------
GeoSeries.representative_point : point guaranteed to be within each geometry
"""
return _delegate_to_geometry_column("centroid", self)
# def concave_hull(self, ratio=0.0, allow_holes=False):
# raise NotImplementedError("This method is not implemented yet.")
# @property
# def convex_hull(self):
# raise NotImplementedError("This method is not implemented yet.")
# def delaunay_triangles(self, tolerance=0.0, only_edges=False):
# raise NotImplementedError("This method is not implemented yet.")
# def voronoi_polygons(self, tolerance=0.0, extend_to=None, only_edges=False):
# raise NotImplementedError("This method is not implemented yet.")
@property
def envelope(self):
"""Returns a ``GeoSeries`` of geometries representing the envelope of
each geometry.
The envelope of a geometry is the bounding rectangle. That is, the
point or smallest rectangular polygon (with sides parallel to the
coordinate axes) that contains the geometry.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point, MultiPoint
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(0, 0), (1, 1), (1, 0)]),
... MultiPoint([(0, 0), (1, 1)]),
... Point(0, 0),
... ]
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 LINESTRING (0 0, 1 1, 1 0)
2 MULTIPOINT ((0 0), (1 1))
3 POINT (0 0)
dtype: geometry
>>> s.envelope
0 POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))
1 POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))
2 POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))
3 POINT (0 0)
dtype: geometry
See also
--------
GeoSeries.convex_hull : convex hull geometry
"""
return _delegate_to_geometry_column("envelope", self)
# def minimum_rotated_rectangle(self):
# raise NotImplementedError("This method is not implemented yet.")
# @property
# def exterior(self):
# raise NotImplementedError("This method is not implemented yet.")
# def extract_unique_points(self):
# raise NotImplementedError("This method is not implemented yet.")
# def offset_curve(self, distance, quad_segs=8, join_style="round", mitre_limit=5.0):
# raise NotImplementedError("This method is not implemented yet.")
# @property
# def interiors(self):
# raise NotImplementedError("This method is not implemented yet.")
# def remove_repeated_points(self, tolerance=0.0):
# raise NotImplementedError("This method is not implemented yet.")
# def set_precision(self, grid_size, mode="valid_output"):
# raise NotImplementedError("This method is not implemented yet.")
# def representative_point(self):
# raise NotImplementedError("This method is not implemented yet.")
# def minimum_bounding_circle(self):
# raise NotImplementedError("This method is not implemented yet.")
# def minimum_bounding_radius(self):
# raise NotImplementedError("This method is not implemented yet.")
# def minimum_clearance(self):
# raise NotImplementedError("This method is not implemented yet.")
# def normalize(self):
# raise NotImplementedError("This method is not implemented yet.")
[docs]
def make_valid(self, *, method="linework", keep_collapsed=True):
"""Repairs invalid geometries.
Returns a ``GeoSeries`` with valid geometries.
If the input geometry is already valid, then it will be preserved.
In many cases, in order to create a valid geometry, the input
geometry must be split into multiple parts or multiple geometries.
If the geometry must be split into multiple parts of the same type
to be made valid, then a multi-part geometry will be returned
(e.g. a MultiPolygon).
If the geometry must be split into multiple parts of different types
to be made valid, then a GeometryCollection will be returned.
In Sedona, only the 'structure' method is available:
* the 'structure' algorithm tries to reason from the structure of the
input to find the 'correct' repair: exterior rings bound area,
interior holes exclude area. It first makes all rings valid, then
shells are merged and holes are subtracted from the shells to
generate valid result. It assumes that holes and shells are correctly
categorized in the input geometry.
Parameters
----------
method : {'linework', 'structure'}, default 'linework'
Algorithm to use when repairing geometry. Sedona Geopandas only supports the 'structure' method.
The default method is "linework" to match compatibility with Geopandas, but it must be explicitly set to
'structure' to use the Sedona implementation.
keep_collapsed : bool, default True
For the 'structure' method, True will keep components that have
collapsed into a lower dimensionality. For example, a ring
collapsing to a line, or a line collapsing to a point.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import MultiPolygon, Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (0, 2), (1, 1), (2, 2), (2, 0), (1, 1), (0, 0)]),
... Polygon([(0, 2), (0, 1), (2, 0), (0, 0), (0, 2)]),
... LineString([(0, 0), (1, 1), (1, 0)]),
... ],
... )
>>> s
0 POLYGON ((0 0, 0 2, 1 1, 2 2, 2 0, 1 1, 0 0))
1 POLYGON ((0 2, 0 1, 2 0, 0 0, 0 2))
2 LINESTRING (0 0, 1 1, 1 0)
dtype: geometry
>>> s.make_valid()
0 MULTIPOLYGON (((1 1, 0 0, 0 2, 1 1)), ((2 0, 1...
1 POLYGON ((0 1, 2 0, 0 0, 0 1))
2 LINESTRING (0 0, 1 1, 1 0)
dtype: geometry
"""
return _delegate_to_geometry_column(
"make_valid", self, method=method, keep_collapsed=keep_collapsed
)
# def reverse(self):
# raise NotImplementedError("This method is not implemented yet.")
[docs]
def segmentize(self, max_segment_length):
"""Returns a ``GeoSeries`` with vertices added to line segments based on
maximum segment length.
Additional vertices will be added to every line segment in an input geometry so
that segments are no longer than the provided maximum segment length. New
vertices will evenly subdivide each segment. Only linear components of input
geometries are densified; other geometries are returned unmodified.
Parameters
----------
max_segment_length : float | array-like
Additional vertices will be added so that all line segments are no longer
than this value. Must be greater than 0.
Returns
-------
GeoSeries
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString
>>> s = GeoSeries(
... [
... LineString([(0, 0), (0, 10)]),
... Polygon([(0, 0), (10, 0), (10, 10), (0, 10), (0, 0)]),
... ],
... )
>>> s
0 LINESTRING (0 0, 0 10)
1 POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))
dtype: geometry
>>> s.segmentize(max_segment_length=5)
0 LINESTRING (0 0, 0 5, 0 10)
1 POLYGON ((0 0, 5 0, 10 0, 10 5, 10 10, 5 10, 0...
dtype: geometry
"""
return _delegate_to_geometry_column("segmentize", self, max_segment_length)
# def transform(self, transformation, include_z=False):
# raise NotImplementedError("This method is not implemented yet.")
# def force_2d(self):
# raise NotImplementedError("This method is not implemented yet.")
# def force_3d(self, z=0):
# raise NotImplementedError("This method is not implemented yet.")
# def line_merge(self, directed=False):
# raise NotImplementedError("This method is not implemented yet.")
# @property
# def unary_union(self):
# raise NotImplementedError("This method is not implemented yet.")
[docs]
def union_all(self, method="unary", grid_size=None) -> BaseGeometry:
"""Returns a geometry containing the union of all geometries in the
``GeoSeries``.
Sedona does not support the method or grid_size argument, so the user does not need to manually
decide the algorithm being used.
Parameters
----------
method : str (default ``"unary"``)
Not supported in Sedona.
grid_size : float, default None
Not supported in Sedona.
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import box
>>> s = GeoSeries([box(0, 0, 1, 1), box(0, 0, 2, 2)])
>>> s
0 POLYGON ((1 0, 1 1, 0 1, 0 0, 1 0))
1 POLYGON ((2 0, 2 2, 0 2, 0 0, 2 0))
dtype: geometry
>>> s.union_all()
<POLYGON ((0 1, 0 2, 2 2, 2 0, 1 0, 0 0, 0 1))>
"""
return _delegate_to_geometry_column("union_all", self, method, grid_size)
[docs]
def crosses(self, other, align=None) -> ps.Series:
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that cross `other`.
An object is said to cross `other` if its `interior` intersects the
`interior` of the other but does not contain it, and the dimension of
the intersection is less than the dimension of the one or the other.
Note: Unlike Geopandas, Sedona's implementation always return NULL when GeometryCollection is involved.
The operation works on a 1-to-1 row-wise manner.
Parameters
----------
other : GeoSeries or geometric object
The GeoSeries (elementwise) or geometric object to test if is
crossed.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = GeoSeries(
... [
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 LINESTRING (0 0, 2 2)
2 LINESTRING (2 0, 0 2)
3 POINT (0 1)
dtype: geometry
>>> s2
1 LINESTRING (1 0, 1 3)
2 LINESTRING (2 0, 0 2)
3 POINT (1 1)
4 POINT (0 1)
dtype: geometry
We can check if each geometry of GeoSeries crosses a single
geometry:
>>> line = LineString([(-1, 1), (3, 1)])
>>> s.crosses(line)
0 True
1 True
2 True
3 False
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.crosses(s2, align=True)
0 False
1 True
2 False
3 False
4 False
dtype: bool
>>> s.crosses(s2, align=False)
0 True
1 True
2 False
3 False
dtype: bool
Notice that a line does not cross a point that it contains.
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries ``crosses`` *any* element of the other one.
See also
--------
GeoSeries.disjoint
GeoSeries.intersects
"""
return _delegate_to_geometry_column("crosses", self, other, align)
[docs]
def intersects(self, other, align=None):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that intersects `other`.
An object is said to intersect `other` if its `boundary` and `interior`
intersects in any way with those of the other.
The operation works on a 1-to-1 row-wise manner.
Parameters
----------
other : GeoSeries or geometric object
The GeoSeries (elementwise) or geometric object to test if is
intersected.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = GeoSeries(
... [
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 LINESTRING (0 0, 2 2)
2 LINESTRING (2 0, 0 2)
3 POINT (0 1)
dtype: geometry
>>> s2
1 LINESTRING (1 0, 1 3)
2 LINESTRING (2 0, 0 2)
3 POINT (1 1)
4 POINT (0 1)
dtype: geometry
We can check if each geometry of GeoSeries crosses a single
geometry:
>>> line = LineString([(-1, 1), (3, 1)])
>>> s.intersects(line)
0 True
1 True
2 True
3 True
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.intersects(s2, align=True)
0 False
1 True
2 True
3 False
4 False
dtype: bool
>>> s.intersects(s2, align=False)
0 True
1 True
2 True
3 True
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries ``crosses`` *any* element of the other one.
See also
--------
GeoSeries.disjoint
GeoSeries.crosses
GeoSeries.touches
GeoSeries.intersection
"""
return _delegate_to_geometry_column("intersects", self, other, align)
[docs]
def overlaps(self, other, align=None):
"""Returns True for all aligned geometries that overlap other, else False.
In the original Geopandas, Geometries overlap if they have more than one but not all
points in common, have the same dimension, and the intersection of the
interiors of the geometries has the same dimension as the geometries
themselves.
However, in Sedona, we return True in the case where the geometries points match.
Note: Sedona's behavior may also differ from Geopandas for GeometryCollections.
The operation works on a 1-to-1 row-wise manner.
Parameters
----------
other : GeoSeries or geometric object
The GeoSeries (elementwise) or geometric object to test if
overlaps.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, MultiPoint, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... MultiPoint([(0, 0), (0, 1)]),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (2, 0), (0, 2)]),
... LineString([(0, 1), (1, 1)]),
... LineString([(1, 1), (3, 3)]),
... Point(0, 1),
... ],
... )
We can check if each geometry of GeoSeries overlaps a single
geometry:
>>> polygon = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
>>> s.overlaps(polygon)
0 True
1 True
2 False
3 False
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We align both GeoSeries
based on index values and compare elements with the same index.
>>> s.overlaps(s2)
0 False
1 True
2 False
3 False
4 False
dtype: bool
>>> s.overlaps(s2, align=False)
0 True
1 False
2 True
3 False
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries ``overlaps`` *any* element of the other one.
See also
--------
GeoSeries.crosses
GeoSeries.intersects
"""
return _delegate_to_geometry_column("overlaps", self, other, align)
[docs]
def touches(self, other, align=None):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that touches `other`.
An object is said to touch `other` if it has at least one point in
common with `other` and its interior does not intersect with any part
of the other. Overlapping features therefore do not touch.
Note: Sedona's behavior may also differ from Geopandas for GeometryCollections.
The operation works on a 1-to-1 row-wise manner.
Parameters
----------
other : GeoSeries or geometric object
The GeoSeries (elementwise) or geometric object to test if is
touched.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from shapely.geometry import Polygon, LineString, MultiPoint, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... MultiPoint([(0, 0), (0, 1)]),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (-2, 0), (0, -2)]),
... LineString([(0, 1), (1, 1)]),
... LineString([(1, 1), (3, 0)]),
... Point(0, 1),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 MULTIPOINT ((0 0), (0 1))
dtype: geometry
>>> s2
1 POLYGON ((0 0, -2 0, 0 -2, 0 0))
2 LINESTRING (0 1, 1 1)
3 LINESTRING (1 1, 3 0)
4 POINT (0 1)
dtype: geometry
We can check if each geometry of GeoSeries touches a single
geometry:
>>> line = LineString([(0, 0), (-1, -2)])
>>> s.touches(line)
0 True
1 True
2 True
3 True
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.touches(s2, align=True)
0 False
1 True
2 True
3 False
4 False
dtype: bool
>>> s.touches(s2, align=False)
0 True
1 False
2 True
3 False
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries ``touches`` *any* element of the other one.
See also
--------
GeoSeries.overlaps
GeoSeries.intersects
"""
return _delegate_to_geometry_column("touches", self, other, align)
[docs]
def within(self, other, align=None):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that is within `other`.
An object is said to be within `other` if at least one of its points is located
in the `interior` and no points are located in the `exterior` of the other.
If either object is empty, this operation returns ``False``.
This is the inverse of `contains` in the sense that the
expression ``a.within(b) == b.contains(a)`` always evaluates to
``True``.
Note: Sedona's behavior may also differ from Geopandas for GeometryCollections and for geometries that are equal.
The operation works on a 1-to-1 row-wise manner.
Parameters
----------
other : GeoSeries or geometric object
The GeoSeries (elementwise) or geometric object to test if each
geometry is within.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (1, 2), (0, 2)]),
... LineString([(0, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(0, 0), (0, 2)]),
... LineString([(0, 0), (0, 1)]),
... Point(0, 1),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 1 2, 0 2, 0 0))
2 LINESTRING (0 0, 0 2)
3 POINT (0 1)
dtype: geometry
>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (0 0, 0 2)
3 LINESTRING (0 0, 0 1)]
4 POINT (0 1)
dtype: geometry
We can check if each geometry of GeoSeries is within a single
geometry:
>>> polygon = Polygon([(0, 0), (2, 2), (0, 2)])
>>> s.within(polygon)
0 True
1 True
2 False
3 False
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s2.within(s)
0 False
1 False
2 True
3 False
4 False
dtype: bool
>>> s2.within(s, align=False)
1 True
2 False
3 True
4 True
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries is ``within`` any element of the other one.
See also
--------
GeoSeries.contains
"""
return _delegate_to_geometry_column("within", self, other, align)
[docs]
def covers(self, other, align=None):
"""
Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that is entirely covering `other`.
An object A is said to cover another object B if no points of B lie
in the exterior of A.
If either object is empty, this operation returns ``False``.
Note: Sedona's implementation instead returns False for identical geometries.
Sedona's behavior may also differ from Geopandas for GeometryCollections.
The operation works on a 1-to-1 row-wise manner.
See
https://lin-ear-th-inking.blogspot.com/2007/06/subtleties-of-ogc-covers-spatial.html
for reference.
Parameters
----------
other : Geoseries or geometric object
The Geoseries (elementwise) or geometric object to check is being covered.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... Point(0, 0),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0.5, 0.5), (1.5, 0.5), (1.5, 1.5), (0.5, 1.5)]),
... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
... LineString([(1, 1), (1.5, 1.5)]),
... Point(0, 0),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 POINT (0 0)
dtype: geometry
>>> s2
1 POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, ...
2 POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))
3 LINESTRING (1 1, 1.5 1.5)
4 POINT (0 0)
dtype: geometry
We can check if each geometry of GeoSeries covers a single
geometry:
>>> poly = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
>>> s.covers(poly)
0 True
1 False
2 False
3 False
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.covers(s2, align=True)
0 False
1 False
2 False
3 False
4 False
dtype: bool
>>> s.covers(s2, align=False)
0 True
1 False
2 True
3 True
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries ``covers`` any element of the other one.
See also
--------
GeoSeries.covered_by
GeoSeries.overlaps
"""
return _delegate_to_geometry_column("covers", self, other, align)
[docs]
def covered_by(self, other, align=None):
"""
Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that is entirely covered by `other`.
An object A is said to cover another object B if no points of B lie
in the exterior of A.
Note: Sedona's implementation instead returns False for identical geometries.
Sedona's behavior may differ from Geopandas for GeometryCollections.
The operation works on a 1-to-1 row-wise manner.
See
https://lin-ear-th-inking.blogspot.com/2007/06/subtleties-of-ogc-covers-spatial.html
for reference.
Parameters
----------
other : Geoseries or geometric object
The Geoseries (elementwise) or geometric object to check is being covered.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0.5, 0.5), (1.5, 0.5), (1.5, 1.5), (0.5, 1.5)]),
... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
... LineString([(1, 1), (1.5, 1.5)]),
... Point(0, 0),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... Point(0, 0),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, ...
1 POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))
2 LINESTRING (1 1, 1.5 1.5)
3 POINT (0 0)
dtype: geometry
>>>
>>> s2
1 POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))
2 POLYGON ((0 0, 2 2, 0 2, 0 0))
3 LINESTRING (0 0, 2 2)
4 POINT (0 0)
dtype: geometry
We can check if each geometry of GeoSeries is covered by a single
geometry:
>>> poly = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
>>> s.covered_by(poly)
0 True
1 True
2 True
3 True
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.covered_by(s2, align=True)
0 False
1 True
2 True
3 True
4 False
dtype: bool
>>> s.covered_by(s2, align=False)
0 True
1 False
2 True
3 True
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries is ``covered_by`` any element of the other one.
See also
--------
GeoSeries.covers
GeoSeries.overlaps
"""
return _delegate_to_geometry_column("covered_by", self, other, align)
[docs]
def distance(self, other, align=None):
"""Returns a ``Series`` containing the distance to aligned `other`.
The operation works on a 1-to-1 row-wise manner:
Parameters
----------
other : Geoseries or geometric object
The Geoseries (elementwise) or geometric object to find the
distance to.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (float)
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 0), (1, 1)]),
... Polygon([(0, 0), (-1, 0), (-1, 1)]),
... LineString([(1, 1), (0, 0)]),
... Point(0, 0),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0.5, 0.5), (1.5, 0.5), (1.5, 1.5), (0.5, 1.5)]),
... Point(3, 1),
... LineString([(1, 0), (2, 0)]),
... Point(0, 1),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 1 0, 1 1, 0 0))
1 POLYGON ((0 0, -1 0, -1 1, 0 0))
2 LINESTRING (1 1, 0 0)
3 POINT (0 0)
dtype: geometry
>>> s2
1 POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, ...
2 POINT (3 1)
3 LINESTRING (1 0, 2 0)
4 POINT (0 1)
dtype: geometry
We can check the distance of each geometry of GeoSeries to a single
geometry:
>>> point = Point(-1, 0)
>>> s.distance(point)
0 1.0
1 0.0
2 1.0
3 1.0
dtype: float64
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and use elements with the same index using
``align=True`` or ignore index and use elements based on their matching
order using ``align=False``:
>>> s.distance(s2, align=True)
0 NaN
1 0.707107
2 2.000000
3 1.000000
4 NaN
dtype: float64
>>> s.distance(s2, align=False)
0 0.000000
1 3.162278
2 0.707107
3 1.000000
dtype: float64
"""
return _delegate_to_geometry_column("distance", self, other, align)
[docs]
def intersection(self, other, align=None):
"""Returns a ``GeoSeries`` of the intersection of points in each
aligned geometry with `other`.
The operation works on a 1-to-1 row-wise manner.
Note: Unlike most functions, intersection may return the unordered with respect to the index.
If this is important to you, you may call ``sort_index()`` on the result.
Parameters
----------
other : Geoseries or geometric object
The Geoseries (elementwise) or geometric object to find the
intersection with.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
GeoSeries
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 6),
... )
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry
>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 POINT (0 1)
dtype: geometry
We can also do intersection of each geometry and a single
shapely geometry:
>>> s.intersection(Polygon([(0, 0), (1, 1), (0, 1)]))
0 POLYGON ((0 0, 0 1, 1 1, 0 0))
1 POLYGON ((0 0, 0 1, 1 1, 0 0))
2 LINESTRING (0 0, 1 1)
3 POINT (1 1)
4 POINT (0 1)
dtype: geometry
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.intersection(s2, align=True)
0 None
1 POLYGON ((0 0, 0 1, 1 1, 0 0))
2 POINT (1 1)
3 LINESTRING (2 0, 0 2)
4 POINT EMPTY
5 None
dtype: geometry
>>> s.intersection(s2, align=False)
0 POLYGON ((0 0, 0 1, 1 1, 0 0))
1 LINESTRING (1 1, 1 2)
2 POINT (1 1)
3 POINT (1 1)
4 POINT (0 1)
dtype: geometry
See Also
--------
GeoSeries.difference
GeoSeries.symmetric_difference
GeoSeries.union
"""
return _delegate_to_geometry_column("intersection", self, other, align)
[docs]
def snap(self, other, tolerance, align=None):
"""Snap the vertices and segments of the geometry to vertices of the reference.
Vertices and segments of the input geometry are snapped to vertices of the
reference geometry, returning a new geometry; the input geometries are not
modified. The result geometry is the input geometry with the vertices and
segments snapped. If no snapping occurs then the input geometry is returned
unchanged. The tolerance is used to control where snapping is performed.
Where possible, this operation tries to avoid creating invalid geometries;
however, it does not guarantee that output geometries will be valid. It is
the responsibility of the caller to check for and handle invalid geometries.
Because too much snapping can result in invalid geometries being created,
heuristics are used to determine the number and location of snapped
vertices that are likely safe to snap. These heuristics may omit
some potential snaps that are otherwise within the tolerance.
Note: Sedona's result may differ slightly from geopandas's snap() result
because of small differences between the underlying engines being used.
The operation works in a 1-to-1 row-wise manner:
Parameters
----------
other : GeoSeries or geometric object
The Geoseries (elementwise) or geometric object to snap to.
tolerance : float or array like
Maximum distance between vertices that shall be snapped
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
GeoSeries
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Point(0.5, 2.5),
... LineString([(0.1, 0.1), (0.49, 0.51), (1.01, 0.89)]),
... Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)]),
... ],
... )
>>> s
0 POINT (0.5 2.5)
1 LINESTRING (0.1 0.1, 0.49 0.51, 1.01 0.89)
2 POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))
dtype: geometry
>>> s2 = GeoSeries(
... [
... Point(0, 2),
... LineString([(0, 0), (0.5, 0.5), (1.0, 1.0)]),
... Point(8, 10),
... ],
... index=range(1, 4),
... )
>>> s2
1 POINT (0 2)
2 LINESTRING (0 0, 0.5 0.5, 1 1)
3 POINT (8 10)
dtype: geometry
We can snap each geometry to a single shapely geometry:
>>> s.snap(Point(0, 2), tolerance=1)
0 POINT (0 2)
1 LINESTRING (0.1 0.1, 0.49 0.51, 1.01 0.89)
2 POLYGON ((0 0, 0 2, 0 10, 10 10, 10 0, 0 0))
dtype: geometry
We can also snap two GeoSeries to each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and snap elements with the same index using
``align=True`` or ignore index and snap elements based on their matching
order using ``align=False``:
>>> s.snap(s2, tolerance=1, align=True)
0 None
1 LINESTRING (0.1 0.1, 0.49 0.51, 1.01 0.89)
2 POLYGON ((0.5 0.5, 1 1, 0 10, 10 10, 10 0, 0.5...
3 None
dtype: geometry
>>> s.snap(s2, tolerance=1, align=False)
0 POINT (0 2)
1 LINESTRING (0 0, 0.5 0.5, 1 1)
2 POLYGON ((0 0, 0 10, 8 10, 10 10, 10 0, 0 0))
dtype: geometry
"""
return _delegate_to_geometry_column("snap", self, other, tolerance, align)
@property
def bounds(self) -> ps.DataFrame:
"""Returns a ``DataFrame`` with columns ``minx``, ``miny``, ``maxx``,
``maxy`` values containing the bounds for each geometry.
See ``GeoSeries.total_bounds`` for the limits of the entire series.
Examples
--------
>>> from shapely.geometry import Point, Polygon, LineString
>>> d = {'geometry': [Point(2, 1), Polygon([(0, 0), (1, 1), (1, 0)]),
... LineString([(0, 1), (1, 2)])]}
>>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
>>> gdf.bounds
minx miny maxx maxy
0 2.0 1.0 2.0 1.0
1 0.0 0.0 1.0 1.0
2 0.0 1.0 1.0 2.0
You can assign the bounds to the ``GeoDataFrame`` as:
>>> import pandas as pd
>>> gdf = pd.concat([gdf, gdf.bounds], axis=1)
>>> gdf
geometry minx miny maxx maxy
0 POINT (2 1) 2.0 1.0 2.0 1.0
1 POLYGON ((0 0, 1 1, 1 0, 0 0)) 0.0 0.0 1.0 1.0
2 LINESTRING (0 1, 1 2) 0.0 1.0 1.0 2.0
"""
return _delegate_to_geometry_column("bounds", self)
@property
def total_bounds(self):
"""Returns a tuple containing ``minx``, ``miny``, ``maxx``, ``maxy``
values for the bounds of the series as a whole.
See ``GeoSeries.bounds`` for the bounds of the geometries contained in
the series.
Examples
--------
>>> from shapely.geometry import Point, Polygon, LineString
>>> d = {'geometry': [Point(3, -1), Polygon([(0, 0), (1, 1), (1, 0)]),
... LineString([(0, 1), (1, 2)])]}
>>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
>>> gdf.total_bounds
array([ 0., -1., 3., 2.])
"""
return _delegate_to_geometry_column("total_bounds", self)
[docs]
def dwithin(self, other, distance, align=None):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that is within a set distance from ``other``.
The operation works on a 1-to-1 row-wise manner:
Parameters
----------
other : GeoSeries or geometric object
The GeoSeries (elementwise) or geometric object to test for
equality.
distance : float, np.array, pd.Series
Distance(s) to test if each geometry is within. A scalar distance will be
applied to all geometries. An array or Series will be applied elementwise.
If np.array or pd.Series are used then it must have same length as the
GeoSeries.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices.
If False, the order of elements is preserved. None defaults to True.
Returns
-------
Series (bool)
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(0, 0), (0, 2)]),
... LineString([(0, 0), (0, 1)]),
... Point(0, 1),
... ],
... index=range(0, 4),
... )
>>> s2 = GeoSeries(
... [
... Polygon([(1, 0), (4, 2), (2, 2)]),
... Polygon([(2, 0), (3, 2), (2, 2)]),
... LineString([(2, 0), (2, 2)]),
... Point(1, 1),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 LINESTRING (0 0, 0 2)
2 LINESTRING (0 0, 0 1)
3 POINT (0 1)
dtype: geometry
>>> s2
1 POLYGON ((1 0, 4 2, 2 2, 1 0))
2 POLYGON ((2 0, 3 2, 2 2, 2 0))
3 LINESTRING (2 0, 2 2)
4 POINT (1 1)
dtype: geometry
We can check if each geometry of GeoSeries contains a single
geometry:
>>> point = Point(0, 1)
>>> s2.dwithin(point, 1.8)
1 True
2 False
3 False
4 True
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.dwithin(s2, distance=1, align=True)
0 False
1 True
2 False
3 False
4 False
dtype: bool
>>> s.dwithin(s2, distance=1, align=False)
0 True
1 False
2 False
3 True
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries is within the set distance of *any* element of the other one.
See also
--------
GeoSeries.within
"""
return _delegate_to_geometry_column("dwithin", self, other, distance, align)
[docs]
def difference(self, other, align=None):
"""Returns a ``GeoSeries`` of the points in each aligned geometry that
are not in `other`.
The operation works on a 1-to-1 row-wise manner:
Unlike Geopandas, Sedona does not support this operation for GeometryCollections.
Parameters
----------
other : Geoseries or geometric object
The Geoseries (elementwise) or geometric object to find the
difference to.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
GeoSeries
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 6),
... )
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry
>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 POINT (0 1)
dtype: geometry
We can check if each geometry of GeoSeries contains a single
geometry:
>>> point = Point(0, 1)
>>> s2.difference(point)
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 GEOMETRYCOLLECTION EMPTY
dtype: geometry
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s.difference(s2, align=True)
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
5 POINT (0 1)
dtype: geometry
>>> s.difference(s2, align=False)
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 GEOMETRYCOLLECTION EMPTY
3 LINESTRING (2 0, 0 2)
4 GEOMETRYCOLLECTION EMPTY
dtype: geometry
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries is different from *any* element of the other one.
See also
--------
GeoSeries.intersection
"""
return _delegate_to_geometry_column("difference", self, other, align)
[docs]
def intersection_all(self):
raise NotImplementedError("This method is not implemented yet.")
[docs]
def contains(self, other, align=None):
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
each aligned geometry that contains `other`.
An object is said to contain `other` if at least one point of `other` lies in
the interior and no points of `other` lie in the exterior of the object.
(Therefore, any given polygon does not contain its own boundary - there is not
any point that lies in the interior.)
If either object is empty, this operation returns ``False``.
This is the inverse of `within` in the sense that the expression
``a.contains(b) == b.within(a)`` always evaluates to ``True``.
Note: Sedona's implementation instead returns False for identical geometries.
The operation works on a 1-to-1 row-wise manner.
Parameters
----------
other : GeoSeries or geometric object
The GeoSeries (elementwise) or geometric object to test if it
is contained.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved.
Returns
-------
Series (bool)
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
>>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(0, 0), (0, 2)]),
... LineString([(0, 0), (0, 1)]),
... Point(0, 1),
... ],
... index=range(0, 4),
... )
>>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (1, 2), (0, 2)]),
... LineString([(0, 0), (0, 2)]),
... Point(0, 1),
... ],
... index=range(1, 5),
... )
>>> s
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 LINESTRING (0 0, 0 2)
2 LINESTRING (0 0, 0 1)
3 POINT (0 1)
dtype: geometry
>>> s2
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 POLYGON ((0 0, 1 2, 0 2, 0 0))
3 LINESTRING (0 0, 0 2)
4 POINT (0 1)
dtype: geometry
We can check if each geometry of GeoSeries contains a single
geometry:
>>> point = Point(0, 1)
>>> s.contains(point)
0 False
1 True
2 False
3 True
dtype: bool
We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:
>>> s2.contains(s, align=True)
0 False
1 False
2 False
3 True
4 False
dtype: bool
>>> s2.contains(s, align=False)
1 True
2 False
3 True
4 True
dtype: bool
Notes
-----
This method works in a row-wise manner. It does not check if an element
of one GeoSeries ``contains`` any element of the other one.
See also
--------
GeoSeries.contains_properly
GeoSeries.within
"""
return _delegate_to_geometry_column("contains", self, other, align)
[docs]
def contains_properly(self, other, align=None):
raise NotImplementedError("This method is not implemented yet.")
[docs]
def to_parquet(self, path, **kwargs):
raise NotImplementedError("This method is not implemented yet.")
[docs]
def buffer(
self,
distance,
resolution=16,
cap_style="round",
join_style="round",
mitre_limit=5.0,
single_sided=False,
**kwargs,
):
"""
Returns a GeoSeries with all geometries buffered by the specified distance.
Parameters
----------
distance : float
The distance to buffer by. Negative distances will create inward buffers.
resolution : int, default 16
The resolution of the buffer around each vertex. Specifies the number of
linear segments in a quarter circle in the approximation of circular arcs.
cap_style : str, default "round"
The style of the buffer cap. One of 'round', 'flat', 'square'.
join_style : str, default "round"
The style of the buffer join. One of 'round', 'mitre', 'bevel'.
mitre_limit : float, default 5.0
The mitre limit ratio for joins when join_style='mitre'.
single_sided : bool, default False
Whether to create a single-sided buffer. In Sedona, True will default to left-sided buffer.
However, 'right' may be specified to use a right-sided buffer.
Returns
-------
GeoSeries
A new GeoSeries with buffered geometries.
Examples
--------
>>> from shapely.geometry import Point
>>> from sedona.spark.geopandas import GeoDataFrame
>>>
>>> data = {
... 'geometry': [Point(0, 0), Point(1, 1)],
... 'value': [1, 2]
... }
>>> gdf = GeoDataFrame(data)
>>> buffered = gdf.buffer(0.5)
"""
return _delegate_to_geometry_column(
"buffer",
self,
distance,
resolution,
cap_style,
join_style,
mitre_limit,
single_sided,
**kwargs,
)
[docs]
def simplify(self, tolerance=None, preserve_topology=True):
"""Returns a ``GeoSeries`` containing a simplified representation of
each geometry.
The algorithm (Douglas-Peucker) recursively splits the original line
into smaller parts and connects these parts' endpoints
by a straight line. Then, it removes all points whose distance
to the straight line is smaller than `tolerance`. It does not
move any points and it always preserves endpoints of
the original line or polygon.
See https://shapely.readthedocs.io/en/latest/manual.html#object.simplify
for details
Simplifies individual geometries independently, without considering
the topology of a potential polygonal coverage. If you would like to treat
the ``GeoSeries`` as a coverage and simplify its edges, while preserving the
coverage topology, see :meth:`simplify_coverage`.
Parameters
----------
tolerance : float
All parts of a simplified geometry will be no more than
`tolerance` distance from the original. It has the same units
as the coordinate reference system of the GeoSeries.
For example, using `tolerance=100` in a projected CRS with meters
as units means a distance of 100 meters in reality.
preserve_topology: bool (default True)
False uses a quicker algorithm, but may produce self-intersecting
or otherwise invalid geometries.
Notes
-----
Invalid geometric objects may result from simplification that does not
preserve topology and simplification may be sensitive to the order of
coordinates: two geometries differing only in order of coordinates may be
simplified differently.
See also
--------
simplify_coverage : simplify geometries using coverage simplification
Examples
--------
>>> from sedona.spark.geopandas import GeoSeries
>>> from shapely.geometry import Point, LineString
>>> s = GeoSeries(
... [Point(0, 0).buffer(1), LineString([(0, 0), (1, 10), (0, 20)])]
... )
>>> s
0 POLYGON ((1 0, 0.99518 -0.09802, 0.98079 -0.19...
1 LINESTRING (0 0, 1 10, 0 20)
dtype: geometry
>>> s.simplify(1)
0 POLYGON ((0 1, 0 -1, -1 0, 0 1))
1 LINESTRING (0 0, 0 20)
dtype: geometry
"""
return _delegate_to_geometry_column(
"simplify", self, tolerance, preserve_topology
)
[docs]
@abstractmethod
def to_geopandas(self) -> Union[gpd.GeoSeries, gpd.GeoDataFrame]: ...
[docs]
@abstractmethod
def plot(self, *args, **kwargs): ...
def _delegate_to_geometry_column(op, this, *args, **kwargs):
geom_column = this.geometry
inplace = kwargs.pop("inplace", False)
if args or kwargs:
data = getattr(geom_column, op)(*args, **kwargs)
else:
data = getattr(geom_column, op)
# If it was a function instead of a property, call it
if callable(data):
data = data()
if inplace:
# This assumes this is a GeoSeries
this._update_inplace(geom_column)
return None
return data