apache · Fokko · Feb 21, 2019 · Feb 21, 2019 · Feb 21, 2019 · drewsonne
diff --git a/UPDATING.md b/UPDATING.md
@@ -38,7 +38,7 @@ Sensors are now accessible via `airflow.sensors` and no longer via `airflow.oper
 For example: `from airflow.operators.sensors import BaseSensorOperator` 
 becomes `from airflow.sensors.base_sensor_operator import BaseSensorOperator`
 
-### Renamed "extra" requirments for cloud providers
+### Renamed "extra" requirements for cloud providers
 
 Subpackages for specific services have been combined into one variant for
 each cloud provider.
@@ -191,7 +191,7 @@ that he has permissions on. If a new role wants to access all the dags, the admi
 We also provide a new cli command(``sync_perm``) to allow admin to auto sync permissions.
 
 ### Modification to `ts_nodash` macro
-`ts_nodash` previously contained TimeZone information alongwith execution date. For Example: `20150101T000000+0000`. This is not user-friendly for file or folder names which was a popular use case for `ts_nodash`. Hence this behavior has been changed and using `ts_nodash` will no longer contain TimeZone information, restoring the pre-1.10 behavior of this macro. And a new macro `ts_nodash_with_tz` has been added which can be used to get a string with execution date and timezone info without dashes.
+`ts_nodash` previously contained TimeZone information along with execution date. For Example: `20150101T000000+0000`. This is not user-friendly for file or folder names which was a popular use case for `ts_nodash`. Hence this behavior has been changed and using `ts_nodash` will no longer contain TimeZone information, restoring the pre-1.10 behavior of this macro. And a new macro `ts_nodash_with_tz` has been added which can be used to get a string with execution date and timezone info without dashes.
 
 Examples:
   * `ts_nodash`: `20150101T000000`

diff --git a/airflow/contrib/example_dags/example_qubole_operator.py b/airflow/contrib/example_dags/example_qubole_operator.py
@@ -65,7 +65,7 @@ def compare_result(ds, **kwargs):
     fetch_logs=True,
     # If `fetch_logs`=true, will fetch qubole command logs and concatenate
     # them into corresponding airflow task logs
-    tags='aiflow_example_run',
+    tags='airflow_example_run',
     # To attach tags to qubole command, auto attach 3 tags - dag_id, task_id, run_id
     qubole_conn_id='qubole_default',
     # Connection id to submit commands inside QDS, if not set "qubole_default" is used
@@ -220,7 +220,7 @@ def main(args: Array[String]) {
     program=prog,
     language='scala',
     arguments='--class SparkPi',
-    tags='aiflow_example_run',
+    tags='airflow_example_run',
     dag=dag)
 
 t11.set_upstream(branching)

diff --git a/airflow/contrib/hooks/azure_data_lake_hook.py b/airflow/contrib/hooks/azure_data_lake_hook.py
@@ -77,7 +77,7 @@ def upload_file(self, local_path, remote_path, nthreads=64, overwrite=True,
             are not supported.
         :type local_path: str
         :param remote_path: Remote path to upload to; if multiple files, this is the
-            dircetory root to write within.
+            directory root to write within.
         :type remote_path: str
         :param nthreads: Number of threads to use. If None, uses the number of cores.
         :type nthreads: int

diff --git a/airflow/contrib/hooks/fs_hook.py b/airflow/contrib/hooks/fs_hook.py
@@ -30,7 +30,7 @@ class FSHook(BaseHook):
     example:
     Conn Id: fs_test
     Conn Type: File (path)
-    Host, Shchema, Login, Password, Port: empty
+    Host, Schema, Login, Password, Port: empty
     Extra: {"path": "/tmp"}
     """
 

diff --git a/airflow/contrib/hooks/qubole_hook.py b/airflow/contrib/hooks/qubole_hook.py
@@ -194,7 +194,7 @@ def get_jobs_id(self, ti):
         """
         Get jobs associated with a Qubole commands
         :param ti: Task Instance of the dag, used to determine the Quboles command id
-        :return: Job informations assoiciated with command
+        :return: Job information associated with command
         """
         if self.cmd is None:
             cmd_id = ti.xcom_pull(key="qbol_cmd_id", task_ids=self.task_id)

diff --git a/airflow/contrib/hooks/salesforce_hook.py b/airflow/contrib/hooks/salesforce_hook.py
@@ -276,7 +276,7 @@ def write_object_to_file(
 
             schema = self.describe_object(object_name)
 
-            # possible columns that can be convereted to timestamps
+            # possible columns that can be converted to timestamps
             # are the ones that are either date or datetime types
             # strings are too general and we risk unintentional conversion
             possible_timestamp_cols = [

diff --git a/airflow/contrib/operators/awsbatch_operator.py b/airflow/contrib/operators/awsbatch_operator.py
@@ -33,7 +33,7 @@ class AWSBatchOperator(BaseOperator):
     """
     Execute a job on AWS Batch Service
 
-    .. warning: the queue parameter was renamed to job_queue to segreggate the
+    .. warning: the queue parameter was renamed to job_queue to segregate the
                 internal CeleryExecutor queue from the AWS Batch internal queue.
 
     :param job_name: the name for the job that will run on AWS Batch (templated)

diff --git a/airflow/contrib/operators/bigquery_check_operator.py b/airflow/contrib/operators/bigquery_check_operator.py
@@ -48,7 +48,7 @@ class BigQueryCheckOperator(CheckOperator):
     This operator can be used as a data quality check in your pipeline, and
     depending on where you put it in your DAG, you have the choice to
     stop the critical path, preventing from
-    publishing dubious data, or on the side and receive email alterts
+    publishing dubious data, or on the side and receive email alerts
     without stopping the progress of the DAG.
 
     :param sql: the sql to be executed

diff --git a/airflow/contrib/operators/cassandra_to_gcs.py b/airflow/contrib/operators/cassandra_to_gcs.py
@@ -266,7 +266,7 @@ def convert_tuple_type(cls, name, value):
         """
         Converts a tuple to RECORD that contains n fields, each will be converted
         to its corresponding data type in bq and will be named 'field_<index>', where
-        index is determined by the order of the tuple elments defined in cassandra.
+        index is determined by the order of the tuple elements defined in cassandra.
         """
         names = ['field_' + str(i) for i in range(len(value))]
         values = [cls.convert_value(name, value) for name, value in zip(names, value)]
@@ -276,7 +276,7 @@ def convert_tuple_type(cls, name, value):
     def convert_map_type(cls, name, value):
         """
         Converts a map to a repeated RECORD that contains two fields: 'key' and 'value',
-        each will be converted to its corresopnding data type in BQ.
+        each will be converted to its corresponding data type in BQ.
         """
         converted_map = []
         for k, v in zip(value.keys(), value.values()):

diff --git a/airflow/contrib/operators/dataflow_operator.py b/airflow/contrib/operators/dataflow_operator.py
@@ -92,7 +92,7 @@ class DataFlowJavaOperator(BaseOperator):
         Cloud Platform for the dataflow job status while the job is in the
         JOB_STATE_RUNNING state.
     :type poll_sleep: int
-    :param job_class: The name of the dataflow job class to be executued, it
+    :param job_class: The name of the dataflow job class to be executed, it
         is often not the main class configured in the dataflow jar file.
     :type job_class: str
 

diff --git a/airflow/contrib/operators/dataproc_operator.py b/airflow/contrib/operators/dataproc_operator.py
@@ -1376,7 +1376,7 @@ def execute(self, context):
         self.hook.wait(self.start())
 
     def start(self, context):
-        raise AirflowException('plese start a workflow operation')
+        raise AirflowException('please start a workflow operation')
 
 
 class DataprocWorkflowTemplateInstantiateOperator(DataprocWorkflowTemplateBaseOperator):

diff --git a/airflow/contrib/operators/druid_operator.py b/airflow/contrib/operators/druid_operator.py
@@ -60,5 +60,5 @@ def execute(self, context):
             druid_ingest_conn_id=self.conn_id,
             max_ingestion_time=self.max_ingestion_time
         )
-        self.log.info("Sumitting %s", self.index_spec_str)
+        self.log.info("Submitting %s", self.index_spec_str)
         hook.submit_indexing_job(self.index_spec_str)
diff --git a/airflow/operators/druid_check_operator.py b/airflow/operators/druid_check_operator.py
@@ -47,7 +47,7 @@ class DruidCheckOperator(CheckOperator):
     This operator can be used as a data quality check in your pipeline, and
     depending on where you put it in your DAG, you have the choice to
     stop the critical path, preventing from
-    publishing dubious data, or on the side and receive email alterts
+    publishing dubious data, or on the side and receive email alerts
     without stopping the progress of the DAG.
 
     :param sql: the sql to be executed

diff --git a/airflow/operators/presto_check_operator.py b/airflow/operators/presto_check_operator.py
@@ -48,7 +48,7 @@ class PrestoCheckOperator(CheckOperator):
     This operator can be used as a data quality check in your pipeline, and
     depending on where you put it in your DAG, you have the choice to
     stop the critical path, preventing from
-    publishing dubious data, or on the side and receive email alterts
+    publishing dubious data, or on the side and receive email alerts
     without stopping the progress of the DAG.
 
     :param sql: the sql to be executed

diff --git a/dev/airflow-pr b/dev/airflow-pr
@@ -63,7 +63,7 @@ AIRFLOW_GIT_LOCATION = os.environ.get(
     "AIRFLOW_GIT",
     os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
 
-# Remote name which points to the Gihub site
+# Remote name which points to the Github site
 GITHUB_REMOTE_NAME = os.environ.get("GITHUB_REMOTE_NAME", "github")
 # OAuth key used for issuing requests against the GitHub API. If this is not
 # defined, then requests will be unauthenticated. You should only need to

diff --git a/docs/howto/operator.rst b/docs/howto/operator.rst
@@ -534,7 +534,7 @@ it will be retrieved from the GCP connection used. Both variants are shown:
 Advanced
 """"""""
 
-When creating a table, you can specify the optional ``initial_split_keys`` and ``column_familes``.
+When creating a table, you can specify the optional ``initial_split_keys`` and ``column_families``.
 Please refer to the Python Client for Google Cloud Bigtable documentation
 `for Table <https://googleapis.github.io/google-cloud-python/latest/bigtable/table.html>`_ and `for Column
 Families <https://googleapis.github.io/google-cloud-python/latest/bigtable/column-family.html>`_.