diff --git a/examples/python/FullScriptSupport_Pandas/.gitignore b/examples/python/FullScriptSupport_Pandas/.gitignore new file mode 100644 index 0000000..c1ce632 --- /dev/null +++ b/examples/python/FullScriptSupport_Pandas/.gitignore @@ -0,0 +1,2 @@ +logs/ +__pycache__/ \ No newline at end of file diff --git a/examples/python/FullScriptSupport_Pandas/ExtensionService_scriptPandas.py b/examples/python/FullScriptSupport_Pandas/ExtensionService_scriptPandas.py new file mode 100644 index 0000000..7af6ddd --- /dev/null +++ b/examples/python/FullScriptSupport_Pandas/ExtensionService_scriptPandas.py @@ -0,0 +1,122 @@ +#! /usr/bin/env python3 +import argparse +import logging +import logging.config +import os +import sys +import time +from concurrent import futures + +# Add Generated folder to module path. +PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(os.path.join(PARENT_DIR, 'Generated')) + +import ServerSideExtension_pb2 as SSE +import grpc +from ScriptEval_scriptPandas import ScriptEval + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + + +class ExtensionService(SSE.ConnectorServicer): + """ + SSE-plugin with support for full script functionality. + """ + + def __init__(self): + """ + Class initializer. + :param funcdef_file: a function definition JSON file + """ + self.ScriptEval = ScriptEval() + os.makedirs('logs', exist_ok=True) + log_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logger.config') + logging.config.fileConfig(log_file) + logging.info('Logging enabled') + + """ + Implementation of rpc functions. + """ + + def GetCapabilities(self, request, context): + """ + Get capabilities. + Note that either request or context is used in the implementation of this method, but still added as + parameters. The reason is that gRPC always sends both when making a function call and therefore we must include + them to avoid error messages regarding too many parameters provided from the client. + :param request: the request, not used in this method. + :param context: the context, not used in this method. + :return: the capabilities. + """ + logging.info('GetCapabilities') + # Create an instance of the Capabilities grpc message + # Enable(or disable) script evaluation + # Set values for pluginIdentifier and pluginVersion + capabilities = SSE.Capabilities(allowScript=True, + pluginIdentifier='Full Script Support using Pandas- Qlik', + pluginVersion='v1.0.0') + + return capabilities + + def EvaluateScript(self, request, context): + """ + This plugin supports full script functionality, that is, all function types and all data types. + :param request: + :param context: + :return: + """ + # Parse header for script request + metadata = dict(context.invocation_metadata()) + header = SSE.ScriptRequestHeader() + header.ParseFromString(metadata['qlik-scriptrequestheader-bin']) + + return self.ScriptEval.EvaluateScript(header, request, context) + + """ + Implementation of the Server connecting to gRPC. + """ + + def Serve(self, port, pem_dir): + """ + Sets up the gRPC Server with insecure connection on port + :param port: port to listen on. + :param pem_dir: Directory including certificates + :return: None + """ + # Create gRPC server + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + SSE.add_ConnectorServicer_to_server(self, server) + + if pem_dir: + # Secure connection + with open(os.path.join(pem_dir, 'sse_server_key.pem'), 'rb') as f: + private_key = f.read() + with open(os.path.join(pem_dir, 'sse_server_cert.pem'), 'rb') as f: + cert_chain = f.read() + with open(os.path.join(pem_dir, 'root_cert.pem'), 'rb') as f: + root_cert = f.read() + credentials = grpc.ssl_server_credentials([(private_key, cert_chain)], root_cert, True) + server.add_secure_port('[::]:{}'.format(port), credentials) + logging.info('*** Running server in secure mode on port: {} ***'.format(port)) + else: + # Insecure connection + server.add_insecure_port('[::]:{}'.format(port)) + logging.info('*** Running server in insecure mode on port: {} ***'.format(port)) + + # Start gRPC server + server.start() + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--port', nargs='?', default='50056') + parser.add_argument('--pem_dir', nargs='?') + args = parser.parse_args() + + calc = ExtensionService() + calc.Serve(args.port, args.pem_dir) diff --git a/examples/python/FullScriptSupport_Pandas/README.md b/examples/python/FullScriptSupport_Pandas/README.md new file mode 100644 index 0000000..cc3205a --- /dev/null +++ b/examples/python/FullScriptSupport_Pandas/README.md @@ -0,0 +1,45 @@ +# Example: Full script support using Pandas +This example plugin includes support for all script functionality and is based on the original [Full Script Support](../FullScriptSupport/README.md) Python example. The implementation of this plugin differs mainly in the use of the Pandas library. In addition, the data received from Qlik is now saved to a Pandas data frame. In this example, we use the `exec` method to evaluate the script rather than the `eval` method, as we did in the original example plugin. This change makes it possible to pass a multiline script from Qlik. + +## Content +* [Implementation](#implementation) + * [Parameters sent from Qlik](#parameters-sent-from-qlik) + * [TableDescription](#tabledescription) + * [Result](#result) +* [Qlik Documents](#qlik-documents) +* [Run the Example!](#run-the-example) + +## Implementation +We have tried to provide well documented code that you can easily follow along with. If something is unclear, please let us know so that we can update and improve our documentation. In this file, we guide you through a few key points in the implementation that are worth clarifying. + +### Parameters sent from Qlik +The parameters sent from Qlik are now stored in a `pandas.DataFrame` object called `q`. The names of the parameters, and hence the column names of `q`, are set to the names sent from Qlik in the _ScriptRequestHeader_. For instance if you send a parameter called `Foo` in Qlik, you will reach the parameter by writing `q.Foo` or `q["Foo"]` in the script. + +If the parameter is of type _Dual_ the plugin will create two additional columns in the `q` data frame, with the string and numerical representation. The column names will have the base as the parameter name but will end with '_str' and '_num' respectively. For example, a parameter called `Bar` with datatype _Dual_ will result in three columns in `q`: `Bar`, `Bar_str` and `Bar_num`. `Bar` will contain strings and numerics, `Bar_str` will contain only strings and `Bar_num` only numerics. + +### TableDescription +In the load script, when using the `Load ... Extension ...` syntax you can create the `TableDescription` message within the script. This can be useful if, for example, you want to name, set tags for, or change the datatype of the fields you are sending back to Qlik. Read more about what metadata can be included in the `TableDescription` in the [SSE_Protocol.md](../../../docs/SSE_Protocol.md#qlik.sse.TableDescription). + +An instance of the `TableDescription` message is available from the script by the name `table`. To that instance you can add metadata according to the protocol. A few simple examples: + +- `table.name = "Table1"` sets the table name to be _Table1_ +- `table.fields.add(name="firstField", dataType=1, tags=["tag1", "tag2"])` adds a _numeric_ field called _firstField_ with the tags _tag1_ and _tag2_. + +Note that if a `TableDescription` is sent, the number of fields in the message must match the number of fields of data sent back to Qlik. + +### Result +With the change to using the `exec` method to evaluate the script, there are some changes regarding what's possible to write in the script. See the Python documentation of `exec` [here](https://docs.python.org/3/library/functions.html#exec). One change is that the `exec` method does not return anything. We must therefore set the result to a specific variable, which we have chosen to call `qResult`. If nothing was set to the variable, no data will be returned to Qlik. Note that `qResult` is not required to be a Pandas data frame. + +For example, if you want to return the same parameters as received from Qlik you can use the script `'qResult = q.values'`. Note that if I wrote `'qResult = q'` the entire data frame, including the column names as the first row, will be passed along to where the duals and BundledRows are created. This could result in an error if the column names are strings and you are supposed to return numerics. + + +## Qlik documents +We provide an example Qlik Sense document (SSE_Full_Script_Support_pandas.qvf). It's the same as the original Full Script Support example, but with modified scripts to work with the Pandas implementation and the use of `exec`. + +In the load script there is an example of the `Load ... Extension ...` syntax for a table load using SSE. There are also examples of using SSE expressions within a regular load. In that case the SSE call is treated as a scalar or aggregation and only one column can be returned. + +There are a number of examples in the sheets of how to retrieve the data from the script, and how to make simple calculations. + + +## Run the example! +To run this example, follow the instructions in [Getting started with the Python examples](../GetStarted.md). diff --git a/examples/python/FullScriptSupport_Pandas/SSEData_scriptPandas.py b/examples/python/FullScriptSupport_Pandas/SSEData_scriptPandas.py new file mode 100644 index 0000000..cc0ba0f --- /dev/null +++ b/examples/python/FullScriptSupport_Pandas/SSEData_scriptPandas.py @@ -0,0 +1,33 @@ +from enum import Enum + + +class ArgType(Enum): + """ + Represents data types that can be used + as arguments in different script functions. + """ + Undefined = -1 + Empty = 0 + String = 1 + Numeric = 2 + Mixed = 3 + + +class ReturnType(Enum): + """ + Represents return types that can + be used in script evaluation. + """ + Undefined = -1 + String = 0 + Numeric = 1 + Dual = 2 + + +class FunctionType(Enum): + """ + Represents function types. + """ + Scalar = 0 + Aggregation = 1 + Tensor = 2 diff --git a/examples/python/FullScriptSupport_Pandas/SSE_Full_Script_Support_pandas.qvf b/examples/python/FullScriptSupport_Pandas/SSE_Full_Script_Support_pandas.qvf new file mode 100644 index 0000000..0859039 Binary files /dev/null and b/examples/python/FullScriptSupport_Pandas/SSE_Full_Script_Support_pandas.qvf differ diff --git a/examples/python/FullScriptSupport_Pandas/ScriptEval_scriptPandas.py b/examples/python/FullScriptSupport_Pandas/ScriptEval_scriptPandas.py new file mode 100644 index 0000000..6bfd90a --- /dev/null +++ b/examples/python/FullScriptSupport_Pandas/ScriptEval_scriptPandas.py @@ -0,0 +1,229 @@ +import logging +import logging.config + +import ServerSideExtension_pb2 as SSE +import grpc +import numpy +import pandas +from SSEData_scriptPandas import ArgType, \ + FunctionType, \ + ReturnType + + +class ScriptEval: + """ + Class for SSE plugin ScriptEval functionality. + """ + + def EvaluateScript(self, header, request, context): + """ + Evaluates script provided in the header, given the + arguments provided in the sequence of RowData objects, the request. + + :param header: + :param request: an iterable sequence of RowData. + :param context: the context sent from client + :return: an iterable sequence of RowData. + """ + # Retrieve function type + func_type = self.get_func_type(header) + + # Retrieve data types from header + arg_types = self.get_arg_types(header) + ret_type = self.get_return_type(header) + + logging.info('EvaluateScript: {} ({} {}) {}' + .format(header.script, arg_types, ret_type, func_type)) + + # Create a panda data frame, for retrieved parameters + q = pandas.DataFrame() + + # Check if parameters are provided + if header.params: + # Iterate over bundled rows + for request_rows in request: + # Iterate over rows + for row in request_rows.rows: + # Retrieve parameters and append to data frame + params, dual_exist = self.get_arguments(context, arg_types, row.duals, header) + q = q.append(params, ignore_index=True) + + # Rename columns based on arg names in header + arg_names = [param.name for param in header.params] + if dual_exist: + # find what column(s) are dual + param_types = [param.dataType for param in header.params] + col_index = [i for i, arg_type in enumerate(param_types) if arg_type == SSE.DUAL] + # add _num and _str columns representing the dual column + # for an easier access in the script + for col in col_index: + arg_names.insert(col + 1, arg_names[col] + '_str') + arg_names.insert(col + 2, arg_names[col] + '_num') + q.rename(columns=lambda i: arg_names[i], inplace=True) + + yield self.evaluate(context, header.script, ret_type, q) + + else: + # No parameters provided + yield self.evaluate(context, header.script, ret_type, q) + + @staticmethod + def get_func_type(header): + """ + Retrieves the function type. + :param header: + :return: + """ + func_type = header.functionType + if func_type == SSE.SCALAR: + return FunctionType.Scalar + elif func_type == SSE.AGGREGATION: + return FunctionType.Aggregation + elif func_type == SSE.TENSOR: + return FunctionType.Tensor + + @staticmethod + def raise_grpc_error(context, status_code, msg): + # Make sure the error handling, including logging, works as intended in the client + context.set_code(status_code) + context.set_details(msg) + # Raise error on the plugin-side + raise grpc.RpcError(status_code, msg) + + def get_arguments(self, context, arg_types, duals, header): + """ + Gets the array of arguments based on + the duals, and the type (string, numeric) + specified in the header. + :param context: the context sent from client + :param arg_types: the argument data type + :param duals: an iterable sequence of duals. + :param header: the script header. + :return: a panda Series containing (potentially mixed data type) arguments. + """ + dual_type = False + if arg_types == ArgType.String: + # All parameters are of string type + script_args = [d.strData for d in duals] + elif arg_types == ArgType.Numeric: + # All parameters are of numeric type + script_args = [d.numData for d in duals] + elif arg_types == ArgType.Mixed: + # Parameters can be either string, numeric or dual + script_args = [] + for dual, param in zip(duals, header.params): + if param.dataType == SSE.STRING: + script_args.append(dual.strData) + elif param.dataType == SSE.NUMERIC: + script_args.append(dual.numData) + elif param.dataType == SSE.DUAL: + script_args.append((dual.numData, dual.strData)) + # We add additional columns with string and numeric representation + # for easier access in script + script_args.append(dual.strData) + script_args.append(dual.numData) + dual_type = True + else: + # Undefined argument types + msg = 'Undefined argument type: '.format(arg_types) + self.raise_grpc_error(context, grpc.StatusCode.INVALID_ARGUMENT, msg) + + return pandas.Series(script_args), dual_type + + @staticmethod + def get_arg_types(header): + """ + Determines the argument types for all parameters. + :param header: + :return: ArgType + """ + data_types = [param.dataType for param in header.params] + + if not data_types: + return ArgType.Empty + elif len(set(data_types)) > 1 or all(data_type == SSE.DUAL for data_type in data_types): + return ArgType.Mixed + elif all(data_type == SSE.STRING for data_type in data_types): + return ArgType.String + elif all(data_type == SSE.NUMERIC for data_type in data_types): + return ArgType.Numeric + else: + return ArgType.Undefined + + @staticmethod + def get_return_type(header): + """ + :param header: + :return: Return type + """ + if header.returnType == SSE.STRING: + return ReturnType.String + elif header.returnType == SSE.NUMERIC: + return ReturnType.Numeric + elif header.returnType == SSE.DUAL: + return ReturnType.Dual + else: + return ReturnType.Undefined + + @staticmethod + def get_duals(result, ret_type): + if isinstance(result, str) or not hasattr(result, '__iter__'): + result = [result] + # Transform the result to an iterable of Dual data + if ret_type == ReturnType.String: + return iter([SSE.Dual(strData=col) for col in result]) + elif ret_type == ReturnType.Numeric: + return iter([SSE.Dual(numData=col) for col in result]) + + @staticmethod + def send_table_description(table, context): + """ + # TableDescription is only handled in Qlik if sent from a 'Load ... Extension ...' script. + # If tableDescription is set when evaluating an expression the header will be ignored + # when received by Qlik. + :param qResult: the result from evaluating the script + :param table: the table description specified in the script + :param context: the request context + :return: nothing + """ + logging.debug('tableDescription sent to Qlik: {}'.format(table)) + # send table description + table_header = (('qlik-tabledescription-bin', table.SerializeToString()),) + context.send_initial_metadata(table_header) + + def evaluate(self, context, script, ret_type, q): + """ + Evaluates a script with given parameters and construct the result to a Row of duals. + :param context: + :param script: script to evaluate + :param ret_type: return data type + :param q: data frame of received parameters, empty if no parameter was sent + :return: a RowData of string dual + """ + table = SSE.TableDescription() + logging.debug('Received data frame (q): {}'.format(q)) + locals_added = {} # The variables set while executing the script will be saved to this dict + # Evaluate script, the result must be saved to the qResult object + exec(script, {'q': q, 'numpy': numpy, 'pandas': pandas, 'table': table}, locals_added) + + if 'qResult' in locals_added: + qResult = locals_added['qResult'] + logging.debug('Result (qResult): {}'.format(qResult)) + + if 'tableDescription' in locals_added and locals_added['tableDescription'] is True: + self.send_table_description(table, context) + + # Transform the result to bundled rows + bundledRows = SSE.BundledRows() + if isinstance(qResult, str) or not hasattr(qResult, '__iter__'): + # A single value is returned + bundledRows.rows.add(duals=self.get_duals(qResult, ret_type)) + else: + for row in qResult: + bundledRows.rows.add(duals=self.get_duals(row, ret_type)) + + return bundledRows + else: + # No result was saved to qResult object + msg = 'No result was saved to qResult, check your script.' + self.raise_grpc_error(context, grpc.StatusCode.INVALID_ARGUMENT, msg) diff --git a/examples/python/FullScriptSupport_Pandas/logger.config b/examples/python/FullScriptSupport_Pandas/logger.config new file mode 100644 index 0000000..8d4294b --- /dev/null +++ b/examples/python/FullScriptSupport_Pandas/logger.config @@ -0,0 +1,32 @@ +[loggers] +keys=root + +[logger_root] +handlers=console,file +level=NOTSET + +[formatters] +keys=simple,complex + +[formatter_simple] +format=%(asctime)s - %(levelname)s - %(message)s + +[formatter_complex] +format=%(asctime)s - %(levelname)s - %(module)s : %(lineno)d - %(message)s + +[handlers] +keys=file,console + +[handler_file] +class=handlers.TimedRotatingFileHandler +interval=midnight +backupCount=5 +formatter=complex +level=DEBUG +args=('logs/SSEPlugin.log',) + +[handler_console] +class=StreamHandler +formatter=simple +level=INFO +args=(sys.stdout,) diff --git a/examples/python/GetStarted.md b/examples/python/GetStarted.md index 67a2892..13f5156 100644 --- a/examples/python/GetStarted.md +++ b/examples/python/GetStarted.md @@ -11,11 +11,13 @@ The following table includes a short description of each example and the functio | __Hello world__ | Script, function | Tensor, Aggregation | String | Enabled (default), Disabled | Returns the same values as received, aggregating all values to a single string, both in script and function calls. Also demonstrates two functions with cache enabled, by default, and disabled by adding date time stamps to the end of each string value.| | __Column operations__ | Script, function| Tensor, Aggregation | Numeric | Enabled (default) | Adds two columns row-wise (tensor). Sums values in a column (aggregation). Demonstrates functionality both as script calls and function calls. | | __Full script support__ | Script | Scalar, Aggregation, Tensor | Numeric, String, Dual | Enabled (default) | Full script support including SSE calls in both load-script and in chart expressions. The Python code to be executed is written in the expression field directly, as a parameter to one of the script functions.| +| __Full script support using Pandas__ | Script | Scalar, Aggregation, Tensor | Numeric, String, Dual | Enabled (default) | Using the Pandas library and exec method to evaluate the script instead of eval. Otherwise the same example as the original full script support | For details about a particular example, see its documentation: - [Hello world](HelloWorld/README.md) - [Column operations](ColumnOperations/README.md) - [Full script support](FullScriptSupport/README.md) +- [Full script support using Pandas](FullScriptSupport_Pandas/README.md) ## Running the Python examples Follow these steps to quickly set up and run an example of your choice on your local machine, with an insecure connection, using either Qlik Sense Desktop, Qlik Sense Enterprise, QlikView Desktop or QlikView Server. To run several examples, or to run the examples with a secure connection or on another node, configure your system according to the instructions referenced in [Configuring SSE plugins in Qlik](../../docs/configuration.md). The ``, `` and `` referred to below are mapped to each example as follows: @@ -25,6 +27,7 @@ Follow these steps to quickly set up and run an example of your choice on your l | __Hello world__ | helloworld | HelloWorld | 50052 | | __Column operations__ | column | Column | 50053 | | __Full script support__ | script | Script | 50051 | +| __Full script support using Pandas__ | scriptPandas | ScriptPandas | 50056 | ### Qlik Sense Desktop 1. Install Qlik Sense Desktop (June 2017 release or later). @@ -78,5 +81,5 @@ Follow these steps to quickly set up and run an example of your choice on your l 6. Start QlikView Server and open the app for the example you chose. ## Configuring all Python examples at once (except Qlik Sense Enterprise) -In order to use all three Python examples in parallel, all you have to do is to map a different name to each port on the same line in *Settings.ini*: -`SSEPlugin=Script,localhost:50051;HelloWorld,localhost:50052;Column,localhost:50053;` \ No newline at end of file +In order to use all four Python examples in parallel, all you have to do is to map a different name to each port on the same line in *Settings.ini*: +`SSEPlugin=Script,localhost:50051;HelloWorld,localhost:50052;Column,localhost:50053;ScriptPandas,localhost:50056` diff --git a/examples/python/prerequisites.md b/examples/python/prerequisites.md index 19486ba..2cdd60c 100644 --- a/examples/python/prerequisites.md +++ b/examples/python/prerequisites.md @@ -1,9 +1,10 @@ # Prerequisites for running the Python examples -If you are able to run the Python examples for gRPC ([http://www.grpc.io/docs/](http://www.grpc.io/docs/)), you have all you need to get started. +To run the Python SSE plugin examples, you need __Python__ version 3.4 or higher. You can find links for installing Anaconda (a Python distribution including common libraries and packages) on the [Anaconda webpage](https://www.continuum.io/downloads). For Python without any libraries, see the [Python webpage](https://www.python.org/downloads/). -Specifically, to run the Python SSE plugin examples, you need the following: +The following _python libraries_ are needed for the specified SSE plugins. -* __pip__, version 8 (or higher). pip is already installed if you're using python 2.0 >= 2.7 or 3 >= 3.4 binaries from python.org, but you will have to upgrade pip. You can update your version of pip by running `$ python -m pip install --upgrade pip`. You can find more information at [https://pip.pypa.io/en/stable/installing/](https://pip.pypa.io/en/stable/installing/). pip is also included in the Anaconda distribution, below. -* __Python__, version 3.4 (or higher). You can find links for installing Anaconda (a Python distribution including common libraries and packages) on the Anaconda webpage: [https://www.continuum.io/downloads](https://www.continuum.io/downloads). For Python without any libraries, see [https://www.python.org/downloads/](https://www.python.org/downloads/). -* __grpcio__: Install the package using pip: `$ python -m pip install grpcio` -* __numpy__: (_FullScriptSupport_ only) Included in most python distributions, including Anaconda. If needed, install using pip: `$ python -m pip install numpy`. +| __Name__ | __SSE plugin(s)__ | __Comments__ | +| ----- | ----- | ----- | +| __grpcio__ |all examples | Install the package using pip: `$ python -m pip install grpcio` | +| __numpy__ |_FullScriptSupport_ and _FullScriptSupport_Pandas_ | Included in most python distributions, including Anaconda. If needed, install using pip: `$ python -m pip install numpy`. | +| __pandas__ |_FullScriptSupport_Pandas_ | Included in most python distributions, including Anaconda. If needed, install using pip: `$ python -m pip install pandas`. |