microsoft · yuslepukhin · Sep 26, 2023 · Jul 6, 2023 · Jul 6, 2023 · Jul 7, 2023
diff --git a/docs/get-started/with-csharp.md b/docs/get-started/with-csharp.md
@@ -16,7 +16,7 @@ nav_order: 4
 ## Install the Nuget Packages with the .NET CLI
 
 ```bash
-dotnet add package Microsoft.ML.OnnxRuntime --version 1.2.0
+dotnet add package Microsoft.ML.OnnxRuntime --version 1.16.0
 dotnet add package System.Numerics.Tensors --version 0.1.0
 ```
 
@@ -42,28 +42,82 @@ This is an [Azure Function](https://azure.microsoft.com/services/functions/) exa
 
             string requestBody = await new StreamReader(req.Body).ReadToEndAsync();
             dynamic data = JsonConvert.DeserializeObject(requestBody);
-            review = review ?? data?.review;
+            review ??= data.review;
+            Debug.Assert(!string.IsNullOrEmpty(review), "Expecting a string with a content");
 
             // Get path to model to create inference session.
-            var modelPath = "./model.onnx";
-
-            // create input tensor (nlp example)
-            var inputTensor = new DenseTensor<string>(new string[] { review }, new int[] { 1, 1 });
-
-            // Create input data for session.
-            var input = new List<NamedOnnxValue> { NamedOnnxValue.CreateFromTensor<string>("input", inputTensor) };
+            const string modelPath = "./model.onnx";
 
             // Create an InferenceSession from the Model Path.
-            var session = new InferenceSession(modelPath);
+            // Creating and loading sessions are expensive per request.
+            // They better be cached
+            using var session = new InferenceSession(modelPath);
 
-            // Run session and send input data in to get inference output. Call ToList then get the Last item. Then use the AsEnumerable extension method to return the Value result as an Enumerable of NamedOnnxValue.
-            var output = session.Run(input).ToList().Last().AsEnumerable<NamedOnnxValue>();
+            // create input tensor (nlp example)
+            using var inputOrtValue = OrtValue.CreateTensorWithEmptyStrings(OrtAllocator.DefaultInstance, new long[] { 1, 1 });
+            inputOrtValue.StringTensorSetElementAt(review, 0);
+
+            // Create input data for session. Request all outputs in this case.
+            var inputs = new Dictionary<string, OrtValue>
+            {
+                { "input", inputOrtValue }
+            };
+
+            using var runOptions = new RunOptions();
+
+            // We are getting a sequence of maps as output. We are interested in the first element (map) of the sequence.
+            // That result is a Sequence of Maps, and we only need the first map from there.
+            using var outputs = session.Run(runOptions, inputs, session.OutputNames);
+            Debug.Assert(outputs.Count > 0, "Expecting some output");
+
+            // We want the last output, which is the sequence of maps
+            var lastOutput = outputs[outputs.Count - 1];
+
+            // Optional code to check the output type
+            {
+                var outputTypeInfo = lastOutput.GetTypeInfo();
+                Debug.Assert(outputTypeInfo.OnnxType == OnnxValueType.ONNX_TYPE_SEQUENCE, "Expecting a sequence");
+
+                var sequenceTypeInfo = outputTypeInfo.SequenceTypeInfo;
+                Debug.Assert(sequenceTypeInfo.ElementType.OnnxType == OnnxValueType.ONNX_TYPE_MAP, "Expecting a sequence of maps");
+            }
+
+            var elementsNum = lastOutput.GetValueCount();
+            Debug.Assert(elementsNum > 0, "Expecting a non empty sequence");
+
+            // Get the first map in sequence
+            using var firstMap = lastOutput.GetValue(0, OrtAllocator.DefaultInstance);
+
+            // Optional code just checking
+            {
+                // Maps always have two elements, keys and values
+                // We are expecting this to be a map of strings to floats
+                var mapTypeInfo = firstMap.GetTypeInfo().MapTypeInfo;
+                Debug.Assert(mapTypeInfo.KeyType == TensorElementType.String, "Expecting keys to be strings");
+                Debug.Assert(mapTypeInfo.ValueType.OnnxType == OnnxValueType.ONNX_TYPE_TENSOR, "Values are in the tensor");
+                Debug.Assert(mapTypeInfo.ValueType.TensorTypeAndShapeInfo.ElementDataType == TensorElementType.Float, "Result map value is float");
+            }
+
+            var inferenceResult = new Dictionary<string, float>();
+            // Let use the visitor to read map keys and values
+            // Here keys and values are represented with the same number of corresponding entries
+            // string -> float
+            firstMap.ProcessMap((keys, values) => {
+                // Access native buffer directly
+                var valuesSpan = values.GetTensorDataAsSpan<float>();
+
+                var entryCount = (int)keys.GetTensorTypeAndShape().ElementCount;
+                inferenceResult.EnsureCapacity(entryCount);
+                for (int i = 0; i < entryCount; ++i)
+                {
+                    inferenceResult.Add(keys.GetStringElement(i), valuesSpan[i]);
+                }
+            }, OrtAllocator.DefaultInstance);
 
-            // From the Enumerable output create the inferenceResult by getting the First value and using the AsDictionary extension method of the NamedOnnxValue.
-            var inferenceResult = output.First().AsDictionary<string, float>();
 
             // Return the inference result as json.
             return new JsonResult(inferenceResult);
+
         }
 ```
 ## Reuse input/output tensor buffers
@@ -73,46 +127,80 @@ In some scenarios, you may want to reuse input/output tensors. This often happen
 ### Chaining: Feed model A's output(s) as input(s) to model B
 
 ```cs
-InferenceSession session1, session2;  // let's say 2 sessions are initialized
-
-Tensor<long> input = new DenseTensor<long>(new[] { 1, inputDimension });  // let's say data is fed into the Tensor objects
-var inputs1 = new List<NamedOnnxValue>()
-              {
-                  NamedOnnxValue.CreateFromTensor("name1", input)
-              };
-// session1 inference
-using (var outputs1 = session1.Run(inputs1))
-{
-    // get intermediate value
-    var input2 = outputs1.First();
-
-    // modify the name of the ONNX value
-    input2.Name = "name2";
-
-    // create input list for session2
-    var inputs2 = new List<NamedOnnxValue>() { input2 };
+using Microsoft.ML.OnnxRuntime.Tensors;
+using Microsoft.ML.OnnxRuntime;
 
-    // session2 inference
-    using (var results = session2.Run(inputs2))
+namespace Samples
+{
+    class FeedModelAToModelB
     {
-        // manipulate the results
+        static void Program()
+        {
+            const string modelAPath = "./modelA.onnx";
+            const string modelBPath = "./modelB.onnx";
+            using InferenceSession session1 = new InferenceSession(modelAPath);
+            using InferenceSession session2 = new InferenceSession(modelBPath);
+
+            // Illustration only
+            float[] inputData = { 1, 2, 3, 4 };
+            long[] inputShape = { 1, 4 };
+
+            using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(inputData, inputShape);
+
+            // Create input data for session. Request all outputs in this case.
+            var inputs1 = new Dictionary<string, OrtValue>
+            {
+                { "input", inputOrtValue }
+            };
+
+            using var runOptions = new RunOptions();
+
+            // session1 inference
+            using (var outputs1 = session1.Run(runOptions, inputs1, session1.OutputNames))
+            {
+                // get intermediate value
+                var outputToFeed = outputs1.First();
+
+                // modify the name of the ONNX value
+                // create input list for session2
+                var inputs2 = new Dictionary<string, OrtValue>
+                {
+                    { "inputNameForModelB", outputToFeed }
+                };
+
+                // session2 inference
+                using (var results = session2.Run(runOptions, inputs2, session2.OutputNames))
+                {
+                    // manipulate the results
+                }
+            }
+        }
     }
 }
+
 ```
 ### Multiple inference runs with fixed sized input(s) and output(s)
 
-If the model have fixed sized inputs and outputs of numeric tensors, you can use "FixedBufferOnnxValue" to accelerate the inference speed. By using "FixedBufferOnnxValue", the container objects only need to be allocated/disposed one time during multiple InferenceSession.Run() calls. This avoids some overhead which may be beneficial for smaller models where the time is noticeable in the overall running time.
+If the model have fixed sized inputs and outputs of numeric tensors,
+use the preferable **OrtValue** and its API to accelerate the inference speed and minimize data transfer.
+**OrtValue** class makes it possible to reuse the underlying buffer for the input and output tensors.
+It pins the managed buffers and makes use of them for inference. It also provides direct access
+to the native buffers for outputs. You can also preallocate `OrtValue` for outputs or create it on top
+of the existing buffers.
+This avoids some overhead which may be beneficial for smaller models
+where the time is noticeable in the overall running time.
 
-<!-- FIXME!: This test is no longer in the repo. Needs to be fixed. -->
-<!-- An example can be found at `TestReusingFixedBufferOnnxValueNonStringTypeMultiInferences()`:
-* [Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs#L1047](https://github.com/microsoft/onnxruntime/blob/main/csharp/test/Microsoft.ML.OnnxRuntime.Tests.Common/InferenceTest.cs#L1047) -->
+Keep in mind that **OrtValue** class, like many other classes in Onnruntime C# API is **IDisposable**.
+It needs to be properly disposed to either unpin the managed buffers or release the native buffers
+to avoid memory leaks.
 
 ## Running on GPU (Optional)
 If using the GPU package, simply use the appropriate SessionOptions when creating an InferenceSession.
 
 ```cs
 int gpuDeviceId = 0; // The GPU device ID to execute on
-var session = new InferenceSession("model.onnx", SessionOptions.MakeSessionOptionWithCudaProvider(gpuDeviceId));
+using var gpuSessionOptoins = SessionOptions.MakeSessionOptionWithCudaProvider(gpuDeviceId);
+using var session = new InferenceSession("model.onnx", gpuSessionOptoins);
 ```
 # ONNX Runtime C# API
 {: .no_toc }

diff --git a/docs/tutorials/csharp/basic_csharp.md b/docs/tutorials/csharp/basic_csharp.md
@@ -1,36 +1,150 @@
 ---
-nav_exclude: true 
+title:  Basic C# Tutorial
+description: Basic usage of C# API
+parent: Inference with C#
+grand_parent: Tutorials
+has_children: false
+nav_order: 1
 ---
+
+
 # C# Tutorial: Basic
 
-Here is simple tutorial for getting started with running inference on an existing ONNX model for a given input data. The model is typically trained using any of the well-known training frameworks and exported into the ONNX format. 
+Here is a simple tutorial for getting started with running inference on an existing ONNX model for a given input data.
+The model is typically trained using any of the well-known training frameworks and then exported into the ONNX format. 
+
+Note, that the following classed `NamedOnnxValue`, `DisposableNamedOnnxValue`, `FixedBufferOnnxValue` are going
+to be deprecated in the future. They are not recommended for new code.
+
+The new `OrtValue` based API is the recommended approach. The `OrtValue` API generates less garbage and is more performant.
+Some scenarios indicated 4x performance improvement over the previous API and significantly less garbage.
+It provides uniform access to data via `ReadOnlySpan<T>` and `Span<T>` structures, regardless of its location, managed or unmanaged.
+
+`DenseTensor` class can still be used for multi-dimensional access to the data since the new `Span` based API feature
+only a 1-D index. However, some reported a slow performance when using `DenseTensor` class multi-dimensional access.
+One can then create an OrtValue on top of the tensors data. 
+
+`ShapeUtils` class provides some help to deal with multi-dimensional indices for OrtValues.
+
+`OrtValue` based API provides direct native memory access in a type safe manner using `ReadOnlySpan<T>` and `Span<T>` stack bases structures.
+OrtValue is a universal container that can hold different ONNX types, such as tensors, maps, and sequences.
+It always existed in the onnxruntime library, but was not exposed in the C# API.
+
+As before, `OrtValues` can be created directly on top of the managed `unmanaged` (struct based blittable types)  arrays.
+Read MS documentation on `blittable` data types. onnxruntime C# API allows use of managed buffers for input or output.
+
+If output shapes are known, one can pre-allocate `OrtValue` on top of the managed or unmanaged allocations and supply
+those OrtValues to be used as outputs. Due to this fact, the need for `IOBinding` is greatly diminished.
+
+String data is represented as UTF-16 string objects in C#. It will still need to be copied and converted to UTF-8 to the native
+memory. However, that conversion is now more optimized and is done in a single pass without intermediate byte arrays.
+The same applies to string `OrtValue` tensors returned as outputs. Character based API now operates on `Span<char>`,
+`ReadOnlySpan<char>`, and `ReadOnlyMemory<char>` objects. This adds flexibility to the API and allows to avoid unnecessary copies.
+
+Except some of the above deprecated API classes, nearly all of C# API classes are `IDisposable`.
+Meaning they need to be disposed after use, otherwise you will get memory leaks.
+Because OrtValues are used to hold tensor data, the sizes of the leaks can be huge. They are likely
+to accumulate with each `Run` call, as each inference call requires input OrtValues and returns output OrtValues.
+Do not hold your breath for finalizers which are not guaranteed to ever run, and if they do, they do it
+when it is too late.
+
+This includes `SessionOptions`, `RunOptions`, `InferenceSession`, `OrtValue`. Run() calls return `IDisposableCollection`
+that allows to dispose all of the containing objects in one statement or `using`. This is because these objects
+own some native resource, often a native object.
+
+Not disposing `OrtValue` that was created on top of the managed buffer would result in
+that buffer pinned in memory indefinitely. Such a buffer can not be garbage collected or moved in memory.
+
+`OrtValue`s that were created on top of the native onnxruntime memory should also be disposed of promptly.
+Otherwise, the native memory will not be deallocated. OrtValues returned by `Run()` usually hold native memory.
+
+GC can not operate on native memory or any other native resources.
+
+The `using` statement or a block is a convenient way to ensure that the objects are disposed.
+`InferenceSession` can be a long lived object and a member of another class. It eventually must also need to be disposed.
+This means, the containing class also would have to be made disposable to achieve this.
+
+OrtValue API also provides visitor like API to walk ONNX maps and sequences.
+This is a more efficient way to access Onnxruntime data.
 
 To start scoring using the model, open a session using the `InferenceSession` class, passing in the file path to the model as a parameter.
 
+```cs
+using var session = new InferenceSession("model.onnx");
+```
+
+Once a session is created, you can execute queries using the `Run` method of the  `InferenceSession` object.
 
 ```cs
-var session = new InferenceSession("model.onnx");
+
+float[] sourceData;  // assume your data is loaded into a flat float array
+long[] dimensions;    // and the dimensions of the input is stored here
+
+// Create a OrtValue on top of the sourceData array
+using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(sourceData, dimensions);
+
+var inputs = new Dictionary<string, OrtValue> {
+    { "name1",  inputOrtValue }
+};
+
+
+using var runOptions = new RunOptions();
+
+// Pass inputs and request the first output
+// Note that the output is a disposable collection that holds OrtValues
+using var output = session.Run(runOptions, inputs, session.OutputNames[0]);
+
+var output_0 = output[0];
+
+// Assuming the output contains a tensor of float data, you can access it as follows
+// Returns Span<float> which points directly to native memory.
+var outputData = output_0.GetTensorDataAsSpan<float>();
+
+// If you are interested in more information about output, request its type and shape
+// Assuming it is a tensor
+// This is not disposable, will be GCed
+// There you can request Shape, ElementDataType, etc
+var tensorTypeAndShape = output_0.GetTensorTypeAndShape();
+
 ```
+You can still use `Tensor` class for data manipulation if you have existing code that does it.
+Then create `OrtValue` on top of Tensor buffer.
+
+```cs
+
+// Create and manipulate the data using tensor interface
+DenseTensor<float> t1 = new DenseTensor<float>(sourceData, dimensions);
+
+// One minor inconvenience is that Tensor class operates on `int` dimensions and indices.
+// OrtValue dimensions are `long`. This is required, because `OrtValue` talks directly to
+// Ort API and the library uses long dimensions.
+
+// Convert dims to long[]
+var shape = Array.Convert<int,long>(dimensions, Convert.ToInt64);
+
+using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(OrtMemoryInfo.DefaultInstance,
+    t1.Buffer, shape);
+
+```
+
+Here is a way to populate a string tensor. Strings can not be mapped, and must be copy/converted to native memory.
+To that end we pre-allocate a native tensor of empty strings with specified dimensions, and then
+set individual strings by index.
 
-Once a session is created, you can execute queries using the `Run` method of the  `InferenceSession` object. Currently, only `Tensor` type of input and outputs  are supported. The results of the `Run` method are represented as a collection of .Net `Tensor` objects (as defined in [System.Numerics.Tensor](https://www.nuget.org/packages/System.Numerics.Tensors)).
 
 ```cs
-Tensor<float> t1, t2;  // let's say data is fed into the Tensor objects
-var inputs = new List<NamedOnnxValue>()
-             {
-                 NamedOnnxValue.CreateFromTensor<float>("name1", t1),
-                 NamedOnnxValue.CreateFromTensor<float>("name2", t2)
-             };
-using (var results = session.Run(inputs))
+
+string[] strs = { "Hello", "Ort", "World" };
+long[] shape = { 1, 1, 3 };
+var elementsNum = ShapeUtils.GetSizeForShape(shape);
+
+using var strTensor = OrtValue.CreateTensorWithEmptyStrings(OrtAllocator.DefaultInstance, shape);
+
+for (long i = 0; i < elementsNum; ++i)
 {
-    // manipulate the results
+    strTensor.StringTensorSetElementAt(strs[i].AsSpan(), i);
 }
+
 ```
 
-You can load your input data into Tensor<T> objects in several ways. A simple example is to create the Tensor from arrays.
 
-```cs
-float[] sourceData;  // assume your data is loaded into a flat float array
-int[] dimensions;    // and the dimensions of the input is stored here
-Tensor<float> t1 = new DenseTensor<float>(sourceData, dimensions);
-```