SNOW-583979: Improve UDF and table merge docstring (#595)

snowflakedb · Nov 1, 2022 · c61dbae · c61dbae
1 parent 3b7381e
commit c61dbae
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 7 deletions.
diff --git a/src/snowflake/snowpark/stored_procedure.py b/src/snowflake/snowpark/stored_procedure.py
@@ -108,7 +108,10 @@ class StoredProcedureRegistration:
           function on the Snowflake server during stored procedure creation. During the serialization, the
           global variables used in the Python function will be serialized into the bytecode,
           but only the name of the module object or any objects from a module that are used in the
-          Python function will be serialized. During the deserialization, Python will look up the
+          Python function will be serialized. If the size of the serialized bytecode is over 8K bytes, it will be uploaded to a stage location as a Python file.
+          If it's under 8K, it will be added to the `Stored Procedure in-line code <https://docs.snowflake.com/en/sql-reference/stored-procedures-python.html#choosing-to-create-a-stored-procedure-with-in-line-code-or-with-code-uploaded-from-a-stage>`__.
+
+          During the deserialization, Python will look up the
           corresponding modules and objects by names.
 
           Details could be found in :class:`snowflake.snowpark.udf.UDFRegistration`.

diff --git a/src/snowflake/snowpark/table.py b/src/snowflake/snowpark/table.py
@@ -82,15 +82,15 @@ def update(self, assignments: Dict[str, ColumnOrLiteral]) -> "WhenMatchedClause"
             >>> # if its key is equal to the key of any row in target.
             >>> # For all such rows, update its value to the value of the
             >>> # corresponding row in source.
-            >>> from snowflake.snowpark.functions import when_matched
+            >>> from snowflake.snowpark.functions import when_matched, lit
             >>> target_df = session.create_dataframe([(10, "old"), (10, "too_old"), (11, "old")], schema=["key", "value"])
             >>> target_df.write.save_as_table("my_table", mode="overwrite", table_type="temporary")
             >>> target = session.table("my_table")
             >>> source = session.create_dataframe([(10, "new")], schema=["key", "value"])
-            >>> target.merge(source, target["key"] == source["key"], [when_matched().update({"value": source["value"]})])
-            MergeResult(rows_inserted=0, rows_updated=2, rows_deleted=0)
+            >>> target.merge(source, (target["key"] == source["key"]) & (target["value"] == lit("too_old")), [when_matched().update({"value": source["value"]})])
+            MergeResult(rows_inserted=0, rows_updated=1, rows_deleted=0)
             >>> target.collect() # the value in the table is updated
-            [Row(KEY=10, VALUE='new'), Row(KEY=10, VALUE='new'), Row(KEY=11, VALUE='old')]
+            [Row(KEY=10, VALUE='old'), Row(KEY=10, VALUE='new'), Row(KEY=11, VALUE='old')]
 
         Note:
             An exception will be raised if this method or :meth:`WhenMatchedClause.delete`

diff --git a/src/snowflake/snowpark/udf.py b/src/snowflake/snowpark/udf.py
@@ -117,6 +117,16 @@ class UDFRegistration:
     permanently. The methods that register a UDF return a :class:`UserDefinedFunction` object,
     which you can also use in :class:`~snowflake.snowpark.Column` expressions.
 
+    Note:
+        Before creating a UDF, think about whether you want to create a vectorized UDF (also referred to as `Python UDF Batch API`) or a regular UDF.
+        The advantages of a vectorized UDF are:
+
+          - The potential for better performance if your Python code operates efficiently on batches of rows.
+          - Less transformation logic is required if you are calling into libraries that operate on Pandas DataFrames or Pandas arrays.
+
+        Refer to `Python UDF Batch API <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch.html>`__ for more details.
+        The following text explains how to create a regular UDF and a vectorized UDF by using the Snowpark Python APIs.
+
     There are two ways to register a UDF with Snowpark:
 
         - Use :func:`~snowflake.snowpark.functions.udf` or :meth:`register`. By pointing to a
@@ -125,8 +135,9 @@ class UDFRegistration:
           function on the Snowflake server during UDF creation. During the serialization, the
           global variables used in the Python function will be serialized into the bytecode,
           but only the name of the module object or any objects from a module that are used in the
-          Python function will be serialized. During the deserialization, Python will look up the
-          corresponding modules and objects by names. For example::
+          Python function will be serialized. If the size of the serialized bytecode is over 8K bytes, it will be uploaded to a stage location as a Python file.
+          If it's under 8K, it will be added to the `UDF in-line code <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-creating.html#udfs-with-in-line-code-vs-udfs-with-code-uploaded-from-a-stage>`__.
+          During the deserialization, Python will look up the corresponding modules and objects by names. For example::
 
                 >>> import numpy
                 >>> from resources.test_udf_dir.test_udf_file import mod5