From 961aecd1dc09ec9c24b35d7a4f3dbadd730a110e Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 28 Dec 2021 17:06:26 -0800 Subject: [PATCH 1/2] Update venv creation in release scripts. --- .../python_release_automation_utils.sh | 3 +- .../main/scripts/build_release_candidate.sh | 3 +- release/src/main/scripts/deploy_pypi.sh | 1 + .../scripts/deploy_release_candidate_pypi.sh | 2 +- release/src/main/scripts/run_rc_validation.sh | 44 +++++++++---------- .../content/en/contribute/release-guide.md | 27 +++--------- 6 files changed, 32 insertions(+), 48 deletions(-) diff --git a/release/src/main/python-release/python_release_automation_utils.sh b/release/src/main/python-release/python_release_automation_utils.sh index 507f50fcad5a..5d0e1c6eff76 100644 --- a/release/src/main/python-release/python_release_automation_utils.sh +++ b/release/src/main/python-release/python_release_automation_utils.sh @@ -161,7 +161,8 @@ function install_sdk() { gsutil version -l rm -rf ./temp_virtualenv_${2} $2 -m venv temp_virtualenv_${2} - . temp_virtualenv_${2}/bin/activate + . ./temp_virtualenv_${2}/bin/activate + pip install --upgrade pip setuptools wheel gcloud_version=$(gcloud --version | head -1 | awk '{print $4}') if [[ "$gcloud_version" < "189" ]]; then update_gcloud diff --git a/release/src/main/scripts/build_release_candidate.sh b/release/src/main/scripts/build_release_candidate.sh index 0c8e7ae2c2c9..7132ad2037eb 100755 --- a/release/src/main/scripts/build_release_candidate.sh +++ b/release/src/main/scripts/build_release_candidate.sh @@ -271,7 +271,7 @@ if [[ $confirmation = "y" ]]; then echo '-------------------Creating Python Virtualenv-----------------' python3 -m venv "${LOCAL_PYTHON_VIRTUALENV}" source "${LOCAL_PYTHON_VIRTUALENV}/bin/activate" - pip install -U pip + pip install --upgrade pip setuptools wheel pip install requests python-dateutil echo '--------------Fetching GitHub Actions Artifacts--------------' @@ -356,6 +356,7 @@ if [[ $confirmation = "y" ]]; then echo "------------------Building Python Doc------------------------" python3 -m venv "${LOCAL_PYTHON_VIRTUALENV}" source "${LOCAL_PYTHON_VIRTUALENV}/bin/activate" + pip install --upgrade pip setuptools wheel cd ${LOCAL_PYTHON_DOC} pip install -U pip pip install tox diff --git a/release/src/main/scripts/deploy_pypi.sh b/release/src/main/scripts/deploy_pypi.sh index d16a6fd5fc5c..38eef0c08a9a 100755 --- a/release/src/main/scripts/deploy_pypi.sh +++ b/release/src/main/scripts/deploy_pypi.sh @@ -43,6 +43,7 @@ cd ${LOCAL_CLONE_DIR} python3 -m venv deploy_pypi_env source ./deploy_pypi_env/bin/activate +pip install --upgrade pip setuptools wheel pip install twine wget -r --no-parent -A zip,whl "https://dist.apache.org/repos/dist/dev/beam/${RELEASE}/python" diff --git a/release/src/main/scripts/deploy_release_candidate_pypi.sh b/release/src/main/scripts/deploy_release_candidate_pypi.sh index defdede0a4c9..d1fef8c60783 100755 --- a/release/src/main/scripts/deploy_release_candidate_pypi.sh +++ b/release/src/main/scripts/deploy_release_candidate_pypi.sh @@ -129,7 +129,7 @@ echo "================Download python artifacts======================" PYTHON_ARTIFACTS_DIR="${LOCAL_CLONE_DIR_ROOT}/python" python3 -m venv deploy_pypi_env source ./deploy_pypi_env/bin/activate -pip install -U pip +pip install --upgrade pip setuptools wheel pip install requests python-dateutil python3 "${SCRIPT_DIR}/download_github_actions_artifacts.py" \ --github-user "${USER_GITHUB_ID}" \ diff --git a/release/src/main/scripts/run_rc_validation.sh b/release/src/main/scripts/run_rc_validation.sh index 28ba2ebeeb33..64914d80d38f 100755 --- a/release/src/main/scripts/run_rc_validation.sh +++ b/release/src/main/scripts/run_rc_validation.sh @@ -98,7 +98,9 @@ echo "All environment and workflow configurations from RC_VALIDATE_CONFIGS:" for i in "${RC_VALIDATE_CONFIGS[@]}"; do echo "$i = ${!i}" done -echo "[Confirmation Required] Are they all provided and correctly set? [y|N]" +echo "TODO(BEAM-13054): parts of this script launch background processes with gnome-terminal," +echo "It may not work well over ssh or within a tmux session. Using 'ssh -Y' may help." +echo "[Confirmation Required] Would you like to proceed with current settings? [y|N]" read confirmation if [[ $confirmation != "y" ]]; then echo "Please rerun this script and make sure you have the right configurations." @@ -174,12 +176,9 @@ if [[ -z `which gcloud` ]]; then fi gcloud --version -echo "-----------------Checking Bigquery CLI-----------------" -if [[ ! -f ~/.bigqueryrc ]]; then - echo "-----------------Initialing Bigquery CLI-----------------" - bq init -fi -bq version +echo "-----Initializing gcloud default and application-default credentials-----" +gcloud auth login +gcloud auth application-default login echo "-----------------Checking gnome-terminal-----------------" if [[ -z `which gnome-terminal` ]]; then @@ -227,7 +226,7 @@ if [[ "$python_quickstart_mobile_game" = true && ! -z `which hub` ]]; then echo "" echo "[NOTE] If there is no jenkins job started, please comment on $PR_URL with: Run Python ReleaseCandidate" else - echo "* Skip Python Quickstart and MobileGame. Hub is required." + echo "* Skipping Python Quickstart and MobileGame. Hub is required." fi # TODO(BEAM-13220) Run the remaining tests on Jenkins. @@ -250,9 +249,6 @@ if [[ ("$python_leaderboard_direct" = true \ echo "--------------------------Verifying Hashes------------------------------------" sha512sum -c apache-beam-${RELEASE_VER}.zip.sha512 - `which pip` install --upgrade pip - `which pip` install --upgrade setuptools - echo "--------------------------Updating ~/.m2/settings.xml-------------------------" cd ~ if [[ ! -d .m2 ]]; then @@ -317,8 +313,9 @@ if [[ ("$python_leaderboard_direct" = true \ do rm -rf ./beam_env_${py_version} echo "--------------Setting up virtualenv with $py_version interpreter----------------" - $py_version -m venv beam_env_${py_version} - . beam_env_${py_version}/bin/activate + $py_version -m venv beam_env_${py_version} + . ./beam_env_${py_version}/bin/activate + pip install --upgrade pip setuptools wheel echo "--------------------------Installing Python SDK-------------------------------" pip install apache-beam-${RELEASE_VER}.zip[gcp] @@ -355,7 +352,7 @@ if [[ ("$python_leaderboard_direct" = true \ bq head -n 10 ${LEADERBOARD_DIRECT_DATASET}.leader_board_teams echo "***************************************************************" else - echo "* Skip Python Leaderboard with DirectRunner" + echo "* Skipping Python Leaderboard with DirectRunner" fi echo "----------------Starting Leaderboard with DataflowRunner---------------------" @@ -393,7 +390,7 @@ if [[ ("$python_leaderboard_direct" = true \ bq head -n 10 ${LEADERBOARD_DF_DATASET}.leader_board_teams echo "***************************************************************" else - echo "* Skip Python Leaderboard with DataflowRunner" + echo "* Skipping Python Leaderboard with DataflowRunner" fi echo "------------------Starting GameStats with DirectRunner-----------------------" @@ -429,7 +426,7 @@ if [[ ("$python_leaderboard_direct" = true \ bq head -n 10 ${GAMESTATS_DIRECT_DATASET}.game_stats_sessions echo "***************************************************************" else - echo "* Skip Python GameStats with DirectRunner" + echo "* Skipping Python GameStats with DirectRunner" fi echo "-------------------Starting GameStats with DataflowRunner--------------------" @@ -468,11 +465,11 @@ if [[ ("$python_leaderboard_direct" = true \ bq head -n 10 ${GAMESTATS_DF_DATASET}.game_stats_sessions echo "***************************************************************" else - echo "* Skip Python GameStats with DataflowRunner" + echo "* Skipping Python GameStats with DataflowRunner" fi done # Loop over Python versions. else - echo "* Skip Python Leaderboard & GameStates Validations" + echo "* Skipping Python Leaderboard & GameStates Validations" fi echo "" @@ -507,7 +504,7 @@ if [[ ("$python_xlang_kafka_taxi_dataflow" = true echo "* Sleeping for 10 mins" sleep 10m else - echo "* Skip Kafka cluster setup" + echo "* Skipping Kafka cluster setup" fi echo "-----------------------Building expansion service jar------------------------" @@ -521,7 +518,8 @@ if [[ ("$python_xlang_kafka_taxi_dataflow" = true rm -rf ./beam_env_${py_version} echo "--------------Setting up virtualenv with $py_version interpreter----------------" $py_version -m venv beam_env_${py_version} - . beam_env_${py_version}/bin/activate + . ./beam_env_${py_version}/bin/activate + pip install --upgrade pip setuptools wheel ln -s ${LOCAL_BEAM_DIR}/sdks beam_env_${py_version}/lib/sdks echo "--------------------------Installing Python SDK-------------------------------" @@ -570,7 +568,7 @@ if [[ ("$python_xlang_kafka_taxi_dataflow" = true fi echo "***************************************************************" else - echo "* Skip Python XLang Kafka Taxi with DataflowRunner" + echo "* Skipping Python XLang Kafka Taxi with DataflowRunner" fi echo "----------------Starting XLang SQL Taxi with DataflowRunner---------------------" @@ -614,11 +612,11 @@ if [[ ("$python_xlang_kafka_taxi_dataflow" = true fi echo "***************************************************************" else - echo "* Skip Python XLang SQL Taxi with DataflowRunner" + echo "* Skipping Python XLang SQL Taxi with DataflowRunner" fi done # Loop over Python versions. else - echo "* Skip Python Cross-language Validations" + echo "* Skipping Python Cross-language Validations" fi echo "*************************************************************" echo " NOTE: Streaming pipelines are not automatically canceled. " diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md index 727c94b49734..bd93b1e3f0ec 100644 --- a/website/www/site/content/en/contribute/release-guide.md +++ b/website/www/site/content/en/contribute/release-guide.md @@ -369,24 +369,8 @@ There are some projects that don't produce the artifacts, e.g. `beam-test-tools` To triage the failures and narrow things down you may want to look at `settings.gradle.kts` and run the build only for the projects you're interested at the moment, e.g. `./gradlew :runners:java-fn-execution`. #### (Alternative) Run release build manually (locally) -* **Pre-installation for python build** - 1. Install pip - - ``` - curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py - python get-pip.py - ``` - 1. Cython - - ``` - sudo pip install cython - sudo apt-get install gcc - sudo apt-get install python-dev - sudo apt-get install python3-dev - sudo apt-get install python3.5-dev - sudo apt-get install python3.6-dev - sudo apt-get install python3.7-dev - ``` +You will need to have Python interpreters for all supported Python minor +versions to run Python tests. See Python installation tips in [Developer Wiki](https://cwiki.apache.org/confluence/display/BEAM/Python+Tips#PythonTips-InstallingPythoninterpreters). * **Run gradle release build** @@ -934,10 +918,9 @@ _Note_: -Prepourl and -Pver can be found in the RC vote email sent by Release Ma * **Setup virtual environment** ``` - pip install --upgrade pip - pip install --upgrade setuptools - python -m venv beam_env - . beam_env/bin/activate + python3 -m venv beam_env + . ./beam_env/bin/activate + pip install --upgrade pip setuptools wheel ``` * **Install SDK** From 8289d42cede46980108ef07bc713bf154c9d97fd Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 28 Dec 2021 17:55:43 -0800 Subject: [PATCH 2/2] Drive-by updates to venv creation in other places. Retry pip installs in test scripts to avoid flakes. --- .../dependency_check/generate_report.sh | 7 ++--- .test-infra/jenkins/job_00_seed.groovy | 3 +- .../jenkins/job_seed_standalone.groovy | 5 +-- runners/portability/test_flink_uber_jar.sh | 1 + runners/portability/test_pipeline_jar.sh | 1 + .../license_scripts/license_script.sh | 3 +- .../apache_beam/examples/kafkataxi/README.md | 5 +-- .../apache_beam/runners/interactive/README.md | 31 ++++++++++--------- .../container/run_generate_requirements.sh | 3 +- .../documentation/sdks/python-dependencies.md | 2 +- 10 files changed, 33 insertions(+), 28 deletions(-) diff --git a/.test-infra/jenkins/dependency_check/generate_report.sh b/.test-infra/jenkins/dependency_check/generate_report.sh index 825287b00902..4a8c879ae92e 100755 --- a/.test-infra/jenkins/dependency_check/generate_report.sh +++ b/.test-infra/jenkins/dependency_check/generate_report.sh @@ -44,10 +44,9 @@ REPORT_DESCRIPTION=" # Virtualenv for the rest of the script to run setup $PYTHON -m venv dependency/check -. dependency/check/bin/activate -pip install --upgrade google-cloud-bigquery -pip install --upgrade google-cloud-bigtable -pip install --upgrade google-cloud-core +. ./dependency/check/bin/activate +pip install --upgrade pip setuptools wheel +pip install --upgrade google-cloud-bigquery google-cloud-bigtable google-cloud-core rm -f build/dependencyUpdates/beam-dependency-check-report.txt # Insall packages and run the unit tests of the report generator and the jira manager diff --git a/.test-infra/jenkins/job_00_seed.groovy b/.test-infra/jenkins/job_00_seed.groovy index e01bc347462e..820f3c4ba991 100644 --- a/.test-infra/jenkins/job_00_seed.groovy +++ b/.test-infra/jenkins/job_00_seed.groovy @@ -108,7 +108,8 @@ job('beam_SeedJob') { command(""" ( cd .test-infra/jenkins/committers_list_generator && python3.8 -m venv ve3 && source ve3/bin/activate && - pip install -r requirements.txt && + pip install --retries 10 --upgrade pip setuptools wheel && + pip install --retries 10 -r requirements.txt && python main.py -o .. && deactivate ) || { echo "ERROR: Failed to fetch committers"; exit 3; } diff --git a/.test-infra/jenkins/job_seed_standalone.groovy b/.test-infra/jenkins/job_seed_standalone.groovy index 33ac831069e7..6ae0cfb8f0d3 100644 --- a/.test-infra/jenkins/job_seed_standalone.groovy +++ b/.test-infra/jenkins/job_seed_standalone.groovy @@ -107,8 +107,9 @@ job('beam_SeedJob_Standalone') { shell { command(""" ( cd .test-infra/jenkins/committers_list_generator && - python3.8 -m venv ve3 && source ve3/bin/activate && - pip install -r requirements.txt && + python3.8 -m venv ve3 && source ./ve3/bin/activate && + pip install --retries 10 --upgrade pip setuptools wheel && + pip install --retries 10 -r requirements.txt && python main.py -o .. && deactivate ) || { echo "ERROR: Failed to fetch committers"; exit 3; } diff --git a/runners/portability/test_flink_uber_jar.sh b/runners/portability/test_flink_uber_jar.sh index 042a51458f6a..d073de4b91ec 100755 --- a/runners/portability/test_flink_uber_jar.sh +++ b/runners/portability/test_flink_uber_jar.sh @@ -82,6 +82,7 @@ docker images --format "{{.Repository}}:{{.Tag}}" | grep "$PYTHON_CONTAINER_IMAG # Set up Python environment python$PYTHON_VERSION -m venv "$ENV_DIR" . $ENV_DIR/bin/activate +pip install --retries 10 --upgrade pip setuptools wheel pip install --retries 10 -e "$PYTHON_ROOT_DIR" # Hacky python script to find a free port. Note there is a small chance the chosen port could diff --git a/runners/portability/test_pipeline_jar.sh b/runners/portability/test_pipeline_jar.sh index d2b84829d160..f01d67b6580c 100755 --- a/runners/portability/test_pipeline_jar.sh +++ b/runners/portability/test_pipeline_jar.sh @@ -74,6 +74,7 @@ docker images --format "{{.Repository}}:{{.Tag}}" | grep $PYTHON_CONTAINER_IMAGE # Set up Python environment python$PYTHON_VERSION -m venv $ENV_DIR . $ENV_DIR/bin/activate +pip install --retries 10 --upgrade pip setuptools wheel pip install --retries 10 -e $PYTHON_ROOT_DIR PIPELINE_PY=" diff --git a/sdks/java/container/license_scripts/license_script.sh b/sdks/java/container/license_scripts/license_script.sh index e73127684924..27560bfd8290 100755 --- a/sdks/java/container/license_scripts/license_script.sh +++ b/sdks/java/container/license_scripts/license_script.sh @@ -41,9 +41,10 @@ mkdir -p "$DOWNLOAD_DIR" cp -r "${EXISTING_LICENSE_DIR}"/*.jar "${DOWNLOAD_DIR}" $PYTHON -m venv ${ENV_DIR} && . ${ENV_DIR}/bin/activate +pip install --retries 10 --upgrade pip setuptools wheel # install packages -${ENV_DIR}/bin/pip install -r ${SCRIPT_DIR}/requirement.txt +pip install --retries 10 -r ${SCRIPT_DIR}/requirement.txt # pull licenses, notices and source code FLAGS="--license_index=${INDEX_FILE} \ diff --git a/sdks/python/apache_beam/examples/kafkataxi/README.md b/sdks/python/apache_beam/examples/kafkataxi/README.md index e8e144d88a8f..dc086e36c3d3 100644 --- a/sdks/python/apache_beam/examples/kafkataxi/README.md +++ b/sdks/python/apache_beam/examples/kafkataxi/README.md @@ -149,8 +149,9 @@ instructions regarding setting up other types of Python virtual environments. ```sh cd .. # Creating the virtual environment in the top level work directory. -python -m venv env -source env/bin/activate +python3 -m venv env +source ./env/bin/activate +pip install --upgrade pip setuptools wheel ``` Install Beam and dependencies and build a Beam distribution. diff --git a/sdks/python/apache_beam/runners/interactive/README.md b/sdks/python/apache_beam/runners/interactive/README.md index 4c28a9080e34..9d0da05047de 100644 --- a/sdks/python/apache_beam/runners/interactive/README.md +++ b/sdks/python/apache_beam/runners/interactive/README.md @@ -84,24 +84,13 @@ a quick reference). For a more general and complete getting started guide, see * Install [GraphViz](https://www.graphviz.org/download/) with your favorite system package manager. -- Install [JupyterLab](https://jupyter.org/install.html). You can use - either **conda** or **pip**. - - * conda - ```bash - conda install -c conda-forge jupyterlab - ``` - * pip - ```bash - pip install jupyterlab - ``` - -- Install, create and activate your [venv](https://docs.python.org/3/library/venv.html). +* Install, create and activate your [venv](https://docs.python.org/3/library/venv.html). (optional but recommended) ```bash python3 -m venv /path/to/beam_venv_dir source /path/to/beam_venv_dir/bin/activate + pip install --upgrade pip setuptools wheel ``` If you are using shells other than bash (e.g. fish, csh), check @@ -114,6 +103,18 @@ a quick reference). For a more general and complete getting started guide, see which python # This sould point to beam_venv_dir/bin/python ``` +* Install [JupyterLab](https://jupyter.org/install.html). You can use + either **conda** or **pip**. + + * conda + ```bash + conda install -c conda-forge jupyterlab + ``` + * pip + ```bash + pip install jupyterlab + ``` + * Set up Apache Beam Python. **Make sure the virtual environment is activated when you run `setup.py`** @@ -123,7 +124,7 @@ a quick reference). For a more general and complete getting started guide, see python setup.py install ``` -- Install an IPython kernel for the virtual environment you've just created. +* Install an IPython kernel for the virtual environment you've just created. **Make sure the virtual environment is activate when you do this.** You can skip this step if not using venv. @@ -139,7 +140,7 @@ a quick reference). For a more general and complete getting started guide, see jupyter kernelspec list ``` -- Extend JupyterLab through labextension. **Note**: labextension is different from nbextension +* Extend JupyterLab through labextension. **Note**: labextension is different from nbextension from pre-lab jupyter notebooks. All jupyter labextensions need nodejs diff --git a/sdks/python/container/run_generate_requirements.sh b/sdks/python/container/run_generate_requirements.sh index a3b8f618022f..9fb39795b7d2 100755 --- a/sdks/python/container/run_generate_requirements.sh +++ b/sdks/python/container/run_generate_requirements.sh @@ -55,8 +55,7 @@ ENV_PATH="$PWD/build/python${PY_VERSION/./}_requirements_gen" rm -rf $ENV_PATH 2>/dev/null || true python${PY_VERSION} -m venv $ENV_PATH source $ENV_PATH/bin/activate -pip install --upgrade pip -pip install wheel +pip install --upgrade pip setuptools wheel # Install gcp extra deps since these deps are commonly used with Apache Beam. # Install dataframe deps to add have Dataframe support in released images. diff --git a/website/www/site/content/en/documentation/sdks/python-dependencies.md b/website/www/site/content/en/documentation/sdks/python-dependencies.md index 4e0bf0a36c51..32d8ba66067b 100644 --- a/website/www/site/content/en/documentation/sdks/python-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-dependencies.md @@ -44,7 +44,7 @@ You can also retrieve the dependency list from the command line using the follow 1. Create a clean virtual environment on your local machine using a supported python version. ``` - $ python -m venv env && source env/bin/activate + $ python3 -m venv env && source ./env/bin/activate && pip install --upgrade pip setuptools wheel ``` 2. [Install the Beam Python SDK](/get-started/quickstart-py/#download-and-install).