From e1dece2f2c30900446e93d40b9cfdf6b525e416c Mon Sep 17 00:00:00 2001
From: Qiusheng Wu <giswqs@gmail.com>
Date: Sat, 11 Nov 2023 01:02:43 -0500
Subject: [PATCH] Add vector_to_parquet function (#598)

* Add vector_to_parquet function

* Fix gdb_to_vector bug

* Improve vector_to_parquet

* Improve file handling
---
 leafmap/common.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/leafmap/common.py b/leafmap/common.py
index ea8cb15dc0..f2170049b5 100644
--- a/leafmap/common.py
+++ b/leafmap/common.py
@@ -11601,8 +11601,12 @@ def gdb_to_vector(
     gdb_path: str,
     out_dir: str,
     layers: Optional[List[str]] = None,
+    filenames: Optional[List[str]] = None,
     gdal_driver: str = "GPKG",
     file_extension: Optional[str] = None,
+    overwrite: bool = False,
+    quiet=False,
+    **kwargs,
 ):
     """Converts layers from a File Geodatabase (GDB) to a vector format.
 
@@ -11610,8 +11614,11 @@ def gdb_to_vector(
         gdb_path (str): The path to the File Geodatabase (GDB).
         out_dir (str): The output directory to save the converted files.
         layers (Optional[List[str]]): A list of layer names to convert. If None, all layers will be converted. Default is None.
+        filenames (Optional[List[str]]): A list of output file names. If None, the layer names will be used as the file names. Default is None.
         gdal_driver (str): The GDAL driver name for the output vector format. Default is "GPKG".
         file_extension (Optional[str]): The file extension for the output files. If None, it will be determined automatically based on the gdal_driver. Default is None.
+        overwrite (bool): Whether to overwrite the existing output files. Default is False.
+        quiet (bool): If True, suppress the log output. Defaults to False.
 
     Returns:
         None
@@ -11628,6 +11635,17 @@ def gdb_to_vector(
     if isinstance(layers, str):
         layers = [layers]
 
+    if isinstance(filenames, str):
+        filenames = [filenames]
+
+    if filenames is not None:
+        if len(filenames) != len(layers):
+            raise ValueError("The length of filenames must match the length of layers.")
+
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+
+    ii = 0
     # Iterate over the layers
     for i in range(layer_count):
         layer = gdb_dataset.GetLayerByIndex(i)
@@ -11641,7 +11659,20 @@ def gdb_to_vector(
             file_extension = get_gdal_file_extension(gdal_driver)
 
         # Create the output file path
-        output_file = os.path.join(out_dir, feature_class_name + "." + file_extension)
+        if filenames is not None:
+            output_file = os.path.join(out_dir, filenames[ii] + "." + file_extension)
+            ii += 1
+        else:
+            output_file = os.path.join(
+                out_dir, feature_class_name + "." + file_extension
+            )
+
+        if os.path.exists(output_file) and not overwrite:
+            print(f"File {output_file} already exists. Skipping...")
+            continue
+        else:
+            if not quiet:
+                print(f"Converting layer {feature_class_name} to {output_file}...")
 
         # Create the output driver
         output_driver = ogr.GetDriverByName(gdal_driver)
@@ -11686,6 +11717,45 @@ def gdb_layer_names(gdb_path: str) -> List[str]:
     return layer_names
 
 
+def vector_to_parquet(
+    source: str, output: str, crs=None, overwrite=False, **kwargs
+) -> None:
+    """
+    Convert a GeoDataFrame or a file containing vector data to Parquet format.
+
+    Args:
+        source (Union[gpd.GeoDataFrame, str]): The source data to convert. It can be either a GeoDataFrame
+            or a file path to the vector data file.
+        output (str): The file path where the Parquet file will be saved.
+        crs (str, optional): The coordinate reference system (CRS) to use for the output file. Defaults to None.
+        overwrite (bool): Whether to overwrite the existing output file. Default is False.
+        **kwargs: Additional keyword arguments to be passed to the `to_parquet` function of GeoDataFrame.
+
+    Returns:
+        None
+    """
+
+    import geopandas as gpd
+
+    if os.path.exists(output) and not overwrite:
+        print(f"File {output} already exists. Skipping...")
+        return
+
+    if isinstance(source, gpd.GeoDataFrame):
+        gdf = source
+    else:
+        gdf = gpd.read_file(source)
+
+    if crs is not None:
+        gdf = gdf.to_crs(crs)
+
+    out_dir = os.path.dirname(os.path.abspath(output))
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+
+    gdf.to_parquet(output, **kwargs)
+
+
 def df_to_gdf(
     df, geometry="geometry", src_crs="EPSG:4326", dst_crs="EPSG:4326", **kwargs
 ):