From 0941203334c8b29cbce76c3f4af0dfeb3e7dd11d Mon Sep 17 00:00:00 2001 From: Sumit Maheshwari Date: Mon, 1 Oct 2018 11:58:27 +0530 Subject: [PATCH] [AIRFLOW-3062] Add Qubole in integration docs (#3946) --- airflow/contrib/sensors/qubole_sensor.py | 40 +++++++++++++++++++----- docs/integration.rst | 26 +++++++++++++++ 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/airflow/contrib/sensors/qubole_sensor.py b/airflow/contrib/sensors/qubole_sensor.py index f79f58746b95a..c0291b1521471 100644 --- a/airflow/contrib/sensors/qubole_sensor.py +++ b/airflow/contrib/sensors/qubole_sensor.py @@ -29,14 +29,6 @@ class QuboleSensor(BaseSensorOperator): """ Base class for all Qubole Sensors - - :param qubole_conn_id: The qubole connection to run the sensor against - :type qubole_conn_id: str - :param data: a JSON object containing payload, whose presence needs to be checked - :type data: a JSON object - - .. note:: Both ``data`` and ``qubole_conn_id`` fields are template-supported. You can - also use ``.txt`` files for template driven use cases. """ template_fields = ('data', 'qubole_conn_id') @@ -75,6 +67,22 @@ def poke(self, context): class QuboleFileSensor(QuboleSensor): + """ + Wait for a file or folder to be present in cloud storage + and check for its presence via QDS APIs + + :param qubole_conn_id: Connection id which consists of qds auth_token + :type qubole_conn_id: str + :param data: a JSON object containing payload, whose presence needs to be checked + Check this `example `_ for sample payload + structure. + :type data: a JSON object + + .. note:: Both ``data`` and ``qubole_conn_id`` fields support templating. You can + also use ``.txt`` files for template-driven use cases. + """ + @apply_defaults def __init__(self, *args, **kwargs): self.sensor_class = FileSensor @@ -82,6 +90,22 @@ def __init__(self, *args, **kwargs): class QubolePartitionSensor(QuboleSensor): + """ + Wait for a Hive partition to show up in QHS (Qubole Hive Service) + and check for its presence via QDS APIs + + :param qubole_conn_id: Connection id which consists of qds auth_token + :type qubole_conn_id: str + :param data: a JSON object containing payload, whose presence needs to be checked. + Check this `example `_ for sample payload + structure. + :type data: a JSON object + + .. note:: Both ``data`` and ``qubole_conn_id`` fields support templating. You can + also use ``.txt`` files for template-driven use cases. + """ + @apply_defaults def __init__(self, *args, **kwargs): self.sensor_class = PartitionSensor diff --git a/docs/integration.rst b/docs/integration.rst index c4800d65ac298..f1ad6c5cb6a1a 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -6,6 +6,7 @@ Integration - :ref:`AWS` - :ref:`Databricks` - :ref:`GCP` +- :ref:`Qubole` .. _ReverseProxy: @@ -829,3 +830,28 @@ Google Kubernetes Engine Hook .. autoclass:: airflow.contrib.hooks.gcp_container_hook.GKEClusterHook :members: + + +.. _Qubole: + +Qubole +------ + +Apache Airflow has a native operator and hooks to talk to `Qubole `__, +which lets you submit your big data jobs directly to Qubole from Apache Airflow. + +QuboleOperator +'''''''''''''' + +.. autoclass:: airflow.contrib.operators.qubole_operator.QuboleOperator + +QubolePartitionSensor +''''''''''''''''''''' + +.. autoclass:: airflow.contrib.sensors.qubole_sensor.QubolePartitionSensor + + +QuboleFileSensor +'''''''''''''''' + +.. autoclass:: airflow.contrib.sensors.qubole_sensor.QuboleFileSensor