Source code for airflow.operators.python_operator

# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from builtins import str
from datetime import datetime
import logging

from airflow.models import BaseOperator, TaskInstance
from airflow.utils.state import State
from airflow.utils.decorators import apply_defaults
from airflow import settings


[docs]class PythonOperator(BaseOperator): """ Executes a Python callable :param python_callable: A reference to an object that is callable :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function :type op_kwargs: dict :param op_args: a list of positional arguments that will get unpacked when calling your callable :type op_args: list :param provide_context: if set to true, Airflow will pass a set of keyword arguments that can be used in your function. This set of kwargs correspond exactly to what you can use in your jinja templates. For this to work, you need to define `**kwargs` in your function header. :type provide_context: bool :param templates_dict: a dictionary where the values are templates that will get templated by the Airflow engine sometime between ``__init__`` and ``execute`` takes place and are made available in your callable's context after the template has been applied :type templates_dict: dict of str :param templates_exts: a list of file extensions to resolve while processing templated fields, for examples ``['.sql', '.hql']`` """ template_fields = ('templates_dict',) template_ext = tuple() ui_color = '#ffefeb' @apply_defaults def __init__( self, python_callable, op_args=None, op_kwargs=None, provide_context=False, templates_dict=None, templates_exts=None, *args, **kwargs): super(PythonOperator, self).__init__(*args, **kwargs) self.python_callable = python_callable self.op_args = op_args or [] self.op_kwargs = op_kwargs or {} self.provide_context = provide_context self.templates_dict = templates_dict if templates_exts: self.template_ext = templates_exts def execute(self, context): if self.provide_context: context.update(self.op_kwargs) context['templates_dict'] = self.templates_dict self.op_kwargs = context return_value = self.python_callable(*self.op_args, **self.op_kwargs) logging.info("Done. Returned value was: " + str(return_value)) return return_value
[docs]class BranchPythonOperator(PythonOperator): """ Allows a workflow to "branch" or follow a single path following the execution of this task. It derives the PythonOperator and expects a Python function that returns the task_id to follow. The task_id returned should point to a task directly downstream from {self}. All other "branches" or directly downstream tasks are marked with a state of ``skipped`` so that these paths can't move forward. The ``skipped`` states are propageted downstream to allow for the DAG state to fill up and the DAG run's state to be inferred. Note that using tasks with ``depends_on_past=True`` downstream from ``BranchPythonOperator`` is logically unsound as ``skipped`` status will invariably lead to block tasks that depend on their past successes. ``skipped`` states propagates where all directly upstream tasks are ``skipped``. """ def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as skipped") session = settings.Session() for task in context['task'].downstream_list: if task.task_id != branch: ti = TaskInstance( task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
[docs]class ShortCircuitOperator(PythonOperator): """ Allows a workflow to continue only if a condition is met. Otherwise, the workflow "short-circuits" and downstream tasks are skipped. The ShortCircuitOperator is derived from the PythonOperator. It evaluates a condition and short-circuits the workflow if the condition is False. Any downstream tasks are marked with a state of "skipped". If the condition is True, downstream tasks proceed as normal. The condition is determined by the result of `python_callable`. """ def execute(self, context): condition = super(ShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return else: logging.info('Skipping downstream tasks...') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance( task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")