From 12de5e6a13902526273dd54e8b2ec7b880a92a97 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 22 Dec 2022 13:16:36 +1100 Subject: [PATCH 001/203] added artifact upload role for oci oss Signed-off-by: Deepak Devadathan --- ansible/artifacts-upload.yml | 10 ++++++++++ .../roles/oci-cloud-storage/defaults/main.yml | 3 +++ .../oci-cloud-storage/tasks/delete-folder.yml | 5 +++++ .../roles/oci-cloud-storage/tasks/delete.yml | 7 +++++++ .../roles/oci-cloud-storage/tasks/download.yml | 7 +++++++ ansible/roles/oci-cloud-storage/tasks/main.yml | 18 ++++++++++++++++++ .../oci-cloud-storage/tasks/upload-folder.yml | 8 ++++++++ .../roles/oci-cloud-storage/tasks/upload.yml | 8 ++++++++ 8 files changed, 66 insertions(+) create mode 100644 ansible/roles/oci-cloud-storage/defaults/main.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/delete-folder.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/delete.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/download.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/main.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/upload-folder.yml create mode 100644 ansible/roles/oci-cloud-storage/tasks/upload.yml diff --git a/ansible/artifacts-upload.yml b/ansible/artifacts-upload.yml index 3bdbe73017..3bc192e194 100644 --- a/ansible/artifacts-upload.yml +++ b/ansible/artifacts-upload.yml @@ -39,3 +39,13 @@ aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" when: cloud_service_provider == "aws" + + - name: upload artifact to oci oss + include_role: + name: oci-cloud-storage + tasks_from: upload.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + oss_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + oss_path: "{{ artifact }}" + when: cloud_service_provider == "oci" \ No newline at end of file diff --git a/ansible/roles/oci-cloud-storage/defaults/main.yml b/ansible/roles/oci-cloud-storage/defaults/main.yml new file mode 100644 index 0000000000..72727de167 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/defaults/main.yml @@ -0,0 +1,3 @@ +oss_bucket_name: "" +oss_path: "" +local_file_or_folder_path: "" diff --git a/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml b/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml new file mode 100644 index 0000000000..6ed4e6b8b4 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/delete-folder.yml @@ -0,0 +1,5 @@ +--- +- name: delete files and folders recursively + shell: "oci os object bulk-delete -ns {{oss_namespace}} -bn {{oss_bucket_name}} --prefix {{oss_path}} --force" + async: 3600 + poll: 10 diff --git a/ansible/roles/oci-cloud-storage/tasks/delete.yml b/ansible/roles/oci-cloud-storage/tasks/delete.yml new file mode 100644 index 0000000000..65d18843ca --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/delete.yml @@ -0,0 +1,7 @@ +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload to oci oss bucket + command: oci os object delete -bn {{ oss_bucket_name }} --name {{ oss_path }} --force + async: 3600 + poll: 10 \ No newline at end of file diff --git a/ansible/roles/oci-cloud-storage/tasks/download.yml b/ansible/roles/oci-cloud-storage/tasks/download.yml new file mode 100644 index 0000000000..63e776c348 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/download.yml @@ -0,0 +1,7 @@ +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: download files from oci oss bucket + command: oci os object bulk-download -bn {{ oss_bucket_name }} --prefix {{ oss_path }} --dest-dir {{ local_file_or_folder_path }} + async: 3600 + poll: 10 \ No newline at end of file diff --git a/ansible/roles/oci-cloud-storage/tasks/main.yml b/ansible/roles/oci-cloud-storage/tasks/main.yml new file mode 100644 index 0000000000..6f9dca6b63 --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/main.yml @@ -0,0 +1,18 @@ +--- +- name: delete files from oci oss bucket + include: delete.yml + +- name: delete folders from oci oss bucket recursively + include: delete-folder.yml + + +- name: download file from oss + include: download.yml + +- name: upload files from a local to oci oss + include: upload.yml + +- name: upload files and folder from local directory to oci oss + include: upload-folder.yml + + diff --git a/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml b/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml new file mode 100644 index 0000000000..6e4d06562c --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/upload-folder.yml @@ -0,0 +1,8 @@ +--- +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload folder to oci oss bucket + command: oci os object bulk-upload -bn {{ oss_bucket_name }} --prefix {{ oss_path }} --src-dir {{ local_file_or_folder_path }} --content-type auto + async: 3600 + poll: 10 diff --git a/ansible/roles/oci-cloud-storage/tasks/upload.yml b/ansible/roles/oci-cloud-storage/tasks/upload.yml new file mode 100644 index 0000000000..0edcbc793f --- /dev/null +++ b/ansible/roles/oci-cloud-storage/tasks/upload.yml @@ -0,0 +1,8 @@ +--- +- name: Ensure oci oss bucket exists + command: oci os bucket get --name {{ oss_bucket_name }} + +- name: Upload to oci oss bucket + command: oci os object put -bn {{ oss_bucket_name }} --name {{ oss_path }} --file {{ local_file_or_folder_path }} --content-type auto + async: 3600 + poll: 10 From ea2c77890301fa52608fa8b08b67d4085383b68b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 22 Dec 2022 13:27:38 +1100 Subject: [PATCH 002/203] overwrite file in oss if the file exists Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cloud-storage/tasks/upload.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/oci-cloud-storage/tasks/upload.yml b/ansible/roles/oci-cloud-storage/tasks/upload.yml index 0edcbc793f..2771da5771 100644 --- a/ansible/roles/oci-cloud-storage/tasks/upload.yml +++ b/ansible/roles/oci-cloud-storage/tasks/upload.yml @@ -3,6 +3,6 @@ command: oci os bucket get --name {{ oss_bucket_name }} - name: Upload to oci oss bucket - command: oci os object put -bn {{ oss_bucket_name }} --name {{ oss_path }} --file {{ local_file_or_folder_path }} --content-type auto + command: oci os object put -bn {{ oss_bucket_name }} --name {{ oss_path }} --file {{ local_file_or_folder_path }} --content-type auto --force async: 3600 poll: 10 From 852a610e155d000a0151ffd7f2e53798be4d5a43 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 10:31:33 +1100 Subject: [PATCH 003/203] added artifact download for oci role Signed-off-by: Deepak Devadathan --- ansible/artifacts-download.yml | 11 +++++++++++ ansible/roles/oci-cloud-storage/tasks/download.yml | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ansible/artifacts-download.yml b/ansible/artifacts-download.yml index 9db0efb42f..e216c10999 100644 --- a/ansible/artifacts-download.yml +++ b/ansible/artifacts-download.yml @@ -38,3 +38,14 @@ aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" when: cloud_service_provider == "aws" + + + - name: download artifact from oci oss + include_role: + name: oci-cloud-storage + tasks_from: download.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + oss_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + oss_object_name: "{{ artifact }}" + when: cloud_service_provider == "oci" diff --git a/ansible/roles/oci-cloud-storage/tasks/download.yml b/ansible/roles/oci-cloud-storage/tasks/download.yml index 63e776c348..bb32e9ed93 100644 --- a/ansible/roles/oci-cloud-storage/tasks/download.yml +++ b/ansible/roles/oci-cloud-storage/tasks/download.yml @@ -2,6 +2,6 @@ command: oci os bucket get --name {{ oss_bucket_name }} - name: download files from oci oss bucket - command: oci os object bulk-download -bn {{ oss_bucket_name }} --prefix {{ oss_path }} --dest-dir {{ local_file_or_folder_path }} + command: oci os object get -bn {{ oss_bucket_name }} --name {{ oss_object_name }} --file {{ local_file_or_folder_path }} async: 3600 - poll: 10 \ No newline at end of file + poll: 10 From 3ee3b85664a41de53e72722ec6f2e7362c43e8c9 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 13:28:37 +1100 Subject: [PATCH 004/203] added role to install oci-cli Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cli/defaults/main.yml | 1 + ansible/roles/oci-cli/tasks/main.yml | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 ansible/roles/oci-cli/defaults/main.yml create mode 100644 ansible/roles/oci-cli/tasks/main.yml diff --git a/ansible/roles/oci-cli/defaults/main.yml b/ansible/roles/oci-cli/defaults/main.yml new file mode 100644 index 0000000000..147a2e03f1 --- /dev/null +++ b/ansible/roles/oci-cli/defaults/main.yml @@ -0,0 +1 @@ +oci_cli_url: https://github.com/oracle/oci-cli/releases/download/v3.22.0/oci-cli-3.22.0-Ubuntu-18.04-Offline.zip \ No newline at end of file diff --git a/ansible/roles/oci-cli/tasks/main.yml b/ansible/roles/oci-cli/tasks/main.yml new file mode 100644 index 0000000000..f3d5ad29b8 --- /dev/null +++ b/ansible/roles/oci-cli/tasks/main.yml @@ -0,0 +1,24 @@ +--- +- name: Download the installation file + get_url: + url: "{{ oci_cli_url }}" + dest: /tmp/ocicli.zip + +- name: Installing unzip + apt: + name: "{{item}}" + state: latest + with_items: + - zip + - unzip + +- name: Unzip the installer + unarchive: + src: /tmp/ocicli.zip + dest: /tmp/ + remote_src: yes + +- name: install oci cli + shell: ./oci-cli-installation/install.sh --accept-all-defaults + args: + chdir: /tmp/ From 3ee652e6f582bed23d13d0c1fef5e43db1100238 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 13:33:40 +1100 Subject: [PATCH 005/203] added csp choice based cloud cli installation Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index af15826aef..3566881eff 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -1,6 +1,7 @@ --- dependencies: - { role: jdk11 , become: yes } - - { role: azure-cli , become: yes } + - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } + - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From 4ffea4d3aeea58ccf2bef6a8561b19aa7531e125 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 13:48:27 +1100 Subject: [PATCH 006/203] added env variables for oci cli for analytics user Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-spark/tasks/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index 13ba75f78a..e6f138edd2 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -10,6 +10,11 @@ with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} + - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} + - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} + - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} + - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} + - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} - name: Adding ENV Vars to spark servers environment. become: yes From 8a70a47ce21d491e14a799253ec855291c305286 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 14:05:39 +1100 Subject: [PATCH 007/203] install oci cli as analytics user Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cli/tasks/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/oci-cli/tasks/main.yml b/ansible/roles/oci-cli/tasks/main.yml index f3d5ad29b8..68adfa6077 100644 --- a/ansible/roles/oci-cli/tasks/main.yml +++ b/ansible/roles/oci-cli/tasks/main.yml @@ -19,6 +19,8 @@ remote_src: yes - name: install oci cli + become: yes + become_user: "{{ analytics_user }}" shell: ./oci-cli-installation/install.sh --accept-all-defaults args: chdir: /tmp/ From 69f2e83a79fb80d5f798ed8728827d4fcdb3871f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 14:11:44 +1100 Subject: [PATCH 008/203] adding oci cli in PATH variable for analytics user Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-spark/tasks/main.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index e6f138edd2..bdd164fc23 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -16,6 +16,15 @@ - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} +- name: Adding PATH for oci cli Vars to bashrc file of spark. + become: yes + become_user: "{{ analytics_user }}" + lineinfile: + path: '{{ analytics_user_home }}/.bashrc' + line: 'export PATH={{ analytics_user_home }}/bin:$PATH' + regexp: "export PATH={{ analytics_user_home }}/bin.*" + when: cloud_service_provider == "oci" + - name: Adding ENV Vars to spark servers environment. become: yes lineinfile: From 4ce637daf46712d23065780a923ebf535856e014 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 14:39:57 +1100 Subject: [PATCH 009/203] install oci-cli in analytics home location Signed-off-by: Deepak Devadathan --- ansible/roles/oci-cli/tasks/main.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ansible/roles/oci-cli/tasks/main.yml b/ansible/roles/oci-cli/tasks/main.yml index 68adfa6077..389a9e8235 100644 --- a/ansible/roles/oci-cli/tasks/main.yml +++ b/ansible/roles/oci-cli/tasks/main.yml @@ -19,8 +19,6 @@ remote_src: yes - name: install oci cli - become: yes - become_user: "{{ analytics_user }}" - shell: ./oci-cli-installation/install.sh --accept-all-defaults + shell: ./oci-cli-installation/install.sh --install-dir {{ analytics_user_home }} --exec-dir {{ analytics_user_home }} --script-dir {{ analytics_user_home }} --accept-all-defaults args: chdir: /tmp/ From 273337473fc1c332b3f10e615490cafdbbf059e1 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 15:41:29 +1100 Subject: [PATCH 010/203] disabled cloud cli temporarlily Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index 3566881eff..0fd0637734 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -1,7 +1,7 @@ --- dependencies: - { role: jdk11 , become: yes } - - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } - - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } + # - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } + # - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From 2e4cf614a94eb53dd25530876ea01b0bc9273397 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 15:42:44 +1100 Subject: [PATCH 011/203] setting env variable with double quotes Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-spark/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index bdd164fc23..cd0cc563d8 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -5,7 +5,7 @@ become_user: "{{ analytics_user }}" lineinfile: path: '{{ analytics_user_home }}/.bashrc' - line: 'export {{item.var}}={{item.value}}' + line: 'export {{item.var}}="{{item.value}}"' regexp: "export {{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} From 2bbab172de10422a638140db1b3cdc6dd7cbd6bc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 16:34:14 +1100 Subject: [PATCH 012/203] placed oci cli env variables in /etc/environment file Signed-off-by: Deepak Devadathan --- .../analytics-bootstrap-spark/tasks/main.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index cd0cc563d8..a16122fecf 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -5,16 +5,12 @@ become_user: "{{ analytics_user }}" lineinfile: path: '{{ analytics_user_home }}/.bashrc' - line: 'export {{item.var}}="{{item.value}}"' + line: 'export {{item.var}}={{item.value}}' regexp: "export {{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} - - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} - - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} - - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} - - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} - - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} + - name: Adding PATH for oci cli Vars to bashrc file of spark. become: yes @@ -29,7 +25,7 @@ become: yes lineinfile: path: '/etc/environment' - line: '{{item.var}}={{item.value}}' + line: '{{item.var}}="{{item.value}}"' regexp: "{{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} @@ -45,6 +41,11 @@ - {var: 'STORAGE_PROVIDER', value: 'AZURE'} - {var: 'ENV', value: '{{env}}'} - {var: 'KAFKA_BROKER_HOST', value: "{{groups['processing-cluster-kafka'][0]}}:9092"} + - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} + - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} + - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} + - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} + - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} - name: Install required python packages become: yes From d0d1b1d238d198f09e792802753de67e2f559d4d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 16:50:55 +1100 Subject: [PATCH 013/203] added jinja2 template for oci cli config Signed-off-by: Deepak Devadathan --- .../analytics-bootstrap-spark/tasks/main.yml | 27 ++++++++++++++----- .../template/oci-cli-config.j2 | 6 +++++ .../template/oci-key.j2 | 1 + 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 create mode 100644 ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index a16122fecf..663f76d68a 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -21,11 +21,31 @@ regexp: "export PATH={{ analytics_user_home }}/bin.*" when: cloud_service_provider == "oci" +- name: Configure OCI cli + become: yes + become_user: "{{ analytics_user }}" + file: + path: "{{ analytics_user_home }}/.oci" + state: directory + when: cloud_service_provider == "oci" + +- name: Create OCI cli config location + become: yes + become_user: "{{ analytics_user }}" + template: src=oci-key.j2 dest={{ analytics_user_home }}/.oci/oci-key.pem mode=600 owner={{ analytics_user }} group={{ analytics_group }} + when: cloud_service_provider == "oci" + +- name: Create OCI cli config file + become: yes + become_user: "{{ analytics_user }}" + template: src=oci-cli-config.j2 dest={{ analytics_user_home }}/.oci/config mode=600 owner={{ analytics_user }} group={{ analytics_group }} + when: cloud_service_provider == "oci" + - name: Adding ENV Vars to spark servers environment. become: yes lineinfile: path: '/etc/environment' - line: '{{item.var}}="{{item.value}}"' + line: '{{item.var}}={{item.value}}' regexp: "{{ item.var }}.*" with_items: - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} @@ -41,11 +61,6 @@ - {var: 'STORAGE_PROVIDER', value: 'AZURE'} - {var: 'ENV', value: '{{env}}'} - {var: 'KAFKA_BROKER_HOST', value: "{{groups['processing-cluster-kafka'][0]}}:9092"} - - {var: 'OCI_CLI_USER', value: '{{ oci_cli_user_ocid }}'} - - {var: 'OCI_CLI_REGION', value: '{{ oci_cli_region }}'} - - {var: 'OCI_CLI_FINGERPRINT', value: '{{ oci_cli_fingerprint }}'} - - {var: 'OCI_CLI_TENANCY', value: '{{ oci_cli_tenancy }}'} - - {var: 'OCI_CLI_KEY_CONTENT', value: '{{ oci_cli_key_content }}'} - name: Install required python packages become: yes diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 b/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 new file mode 100644 index 0000000000..56cf3ba3ef --- /dev/null +++ b/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 @@ -0,0 +1,6 @@ +[DEFAULT] +user={{oci_cli_user_ocid }} +fingerprint={{oci_cli_fingerprint}} +key_file=/home/analytics/.oci/oci-key.pem +tenancy={{oci_cli_tenancy}} +region={{oci_cli_region}} \ No newline at end of file diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 b/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 new file mode 100644 index 0000000000..b969594016 --- /dev/null +++ b/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 @@ -0,0 +1 @@ +{{ oci_cli_key_content }} \ No newline at end of file From cc11113213dd522c43f0a6365ff844666d4027c6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 16:58:28 +1100 Subject: [PATCH 014/203] renamed dir templates to template Signed-off-by: Deepak Devadathan --- .../{template => templates}/oci-cli-config.j2 | 0 .../analytics-bootstrap-spark/{template => templates}/oci-key.j2 | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename ansible/roles/analytics-bootstrap-spark/{template => templates}/oci-cli-config.j2 (100%) rename ansible/roles/analytics-bootstrap-spark/{template => templates}/oci-key.j2 (100%) diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 similarity index 100% rename from ansible/roles/analytics-bootstrap-spark/template/oci-cli-config.j2 rename to ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 diff --git a/ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 similarity index 100% rename from ansible/roles/analytics-bootstrap-spark/template/oci-key.j2 rename to ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 From 2e11eaf0677df8793be6d4737ffcbabcbe7092ca Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 17:03:58 +1100 Subject: [PATCH 015/203] disabled only oci cli temporarily Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index 0fd0637734..e04443fb98 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -1,7 +1,7 @@ --- dependencies: - { role: jdk11 , become: yes } - # - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } + - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } # - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From 054d9dbd233efd46a688af4a640c065188d5dd1e Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 23 Dec 2022 17:06:01 +1100 Subject: [PATCH 016/203] testing idempotency for oci cli Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-bootstrap-always/meta/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/analytics-bootstrap-always/meta/main.yml b/ansible/roles/analytics-bootstrap-always/meta/main.yml index e04443fb98..3566881eff 100644 --- a/ansible/roles/analytics-bootstrap-always/meta/main.yml +++ b/ansible/roles/analytics-bootstrap-always/meta/main.yml @@ -2,6 +2,6 @@ dependencies: - { role: jdk11 , become: yes } - { role: azure-cli , become: yes, when: cloud_service_provider == "azure" } - # - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } + - { role: oci-cli , become: yes, when: cloud_service_provider == "oci" } From b1de73736c5147f67c41ff9e860b2f939b2d56da Mon Sep 17 00:00:00 2001 From: Kenneth Heung Date: Sun, 25 Dec 2022 13:43:11 +0800 Subject: [PATCH 017/203] change Spark provision task with Ruby 2.6 In release 4.6.0, Ruby 2.6 is being used. Don't know why in 4.10 / 5.0, Ruby suddenly became 2.2. In fact, there is NO ruby2.2-dev for bionic. Changing to Ruby 2.6 to proceed --- ansible/roles/analytics-spark-provision/tasks/main.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ansible/roles/analytics-spark-provision/tasks/main.yml b/ansible/roles/analytics-spark-provision/tasks/main.yml index 01eef6c9e1..65731cecb9 100644 --- a/ansible/roles/analytics-spark-provision/tasks/main.yml +++ b/ansible/roles/analytics-spark-provision/tasks/main.yml @@ -117,28 +117,31 @@ recurse: yes become: yes +# kenneth changed to install Ruby 2.6 as per R.4.6.0 - name: Install latest ruby become: yes become_user: "{{ analytics_user }}" - shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.2" + shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.6" - name: Add ruby repository become: yes apt_repository: repo: ppa:brightbox/ruby-ng +# kenneth changed to install ruby-dev 2.6 as per R.4.6.0 - there is no ruby2.2-dev in bionic - name: Install latest ruby-dev become: yes apt: - name: "ruby2.2-dev" + name: "ruby2.6-dev" state: installed update_cache: true cache_valid_time: 3600 +# changed to ruby 2.6 as per R.4.6.0 - name: Install ruby-kafka become: yes become_user: "{{ analytics_user }}" - shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.2 && gem install ruby-kafka'" + shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.6 && gem install --user-install --no-document ruby-kafka'" - name: Download Kafka-2.11 become: yes From 07d4e5e06f4348d4a2aac72a7bd2533b94929ff5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 28 Dec 2022 00:56:20 +1100 Subject: [PATCH 018/203] added apiversion selector Signed-off-by: Deepak Devadathan --- .../helm_charts/bootstrap/reloader/templates/clusterrole.yaml | 4 ++++ .../bootstrap/reloader/templates/clusterrolebinding.yaml | 4 ++++ kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml | 4 ++++ .../helm_charts/bootstrap/reloader/templates/rolebinding.yaml | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml index 8d51ef406b..b2817c5f9a 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrole.yaml @@ -1,5 +1,9 @@ {{- if and .Values.reloader.watchGlobally (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: ClusterRole metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml index 28c9d4b916..748e52528d 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/clusterrolebinding.yaml @@ -1,5 +1,9 @@ {{- if and .Values.reloader.watchGlobally (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: ClusterRoleBinding metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml index 5827f5cdcb..b654024031 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml @@ -1,5 +1,9 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: Role metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml index 94fb1f838b..d915db304d 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml @@ -1,5 +1,9 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} +{{- if (.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1") }} +apiVersion: rbac.authorization.k8s.io/v1 +{{ else }} apiVersion: rbac.authorization.k8s.io/v1beta1 +{{- end }} kind: RoleBinding metadata: labels: From 9ba64978a30186794f206d4c487369911b3e584a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:40:54 +1100 Subject: [PATCH 019/203] added the oci oss bucket upload Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index df495a5d4a..9ac97b4d43 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -2,6 +2,11 @@ - name: Ensure azure blob storage container exists command: az storage container create --name {{ bucket }} when: dp_object_store_type == "azure" + +- name: Ensure oci oss bucket exists + command: oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}} + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + tags: - always From 5fa0e0040d6414ce302c373ed0a94133d368c65b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:46:46 +1100 Subject: [PATCH 020/203] added full path of oci from analaytics home Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 9ac97b4d43..0921ff5531 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,7 +4,7 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}} + command: "{{ analytics.home }}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: From 329e8200a63f9ac2d152c5e22b9329bb343f33ad Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:48:37 +1100 Subject: [PATCH 021/203] correction in oci cli location Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 0921ff5531..ab31c09e87 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,7 +4,7 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: "{{ analytics.home }}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: From 86b024d326716aa5a48f2bb1d280dae8009ad2f6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 16:54:28 +1100 Subject: [PATCH 022/203] check bucket existence before creating Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index ab31c09e87..a0977df794 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,8 +4,13 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: oci os bucket get --name {{ bucket }} when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + register: check_bucket + +- name: Ensure oci oss bucket exists + command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 tags: - always From 51894dbbd89174cf3e4a77fc206d4283570d90ae Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:02:49 +1100 Subject: [PATCH 023/203] testing the logic for register variable Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index a0977df794..7a836a8bf2 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -5,8 +5,9 @@ - name: Ensure oci oss bucket exists command: oci os bucket get --name {{ bucket }} - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" register: check_bucket + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" From cbb93c158b26b822c017a7773f8da01e207e4519 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:04:37 +1100 Subject: [PATCH 024/203] using full path for oci cli Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 7a836a8bf2..2053e72ded 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,11 +4,10 @@ when: dp_object_store_type == "azure" - name: Ensure oci oss bucket exists - command: oci os bucket get --name {{ bucket }} + command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "s3" and cloud_service_provider == "oci" - - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 From 47ccff0153da65ad3881350fb3b396083eac3d0b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:06:45 +1100 Subject: [PATCH 025/203] testing the bucket check Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 2053e72ded..b677d2f13d 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -8,7 +8,7 @@ register: check_bucket when: dp_object_store_type == "s3" and cloud_service_provider == "oci" -- name: Ensure oci oss bucket exists +- name: Create oci oss bucket command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 From d3b24a979c3c62473ba30b659398062b742a5163 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:08:27 +1100 Subject: [PATCH 026/203] added the always tag individually Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index b677d2f13d..28efbd3516 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -2,16 +2,19 @@ - name: Ensure azure blob storage container exists command: az storage container create --name {{ bucket }} when: dp_object_store_type == "azure" - + tags: + - always + - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "s3" and cloud_service_provider == "oci" - + tags: + - always + - name: Create oci oss bucket command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 - tags: - always From 3c7931040aa6ee5f577c328f77a32663ba793325 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:23:05 +1100 Subject: [PATCH 027/203] place an oci os upload command for every upload Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/tasks/main.yml | 49 ++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 28efbd3516..4d454f9d6f 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -4,7 +4,7 @@ when: dp_object_store_type == "azure" tags: - always - + - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket @@ -27,6 +27,15 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - dataproducts-spark-cluster + +- name: Copy Core Data Products to oci oss + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: - dataproducts-spark-cluster @@ -40,8 +49,17 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar -f {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar async: 3600 poll: 10 + when: dp_object_store_type == "azure" tags: - - ed-dataproducts-spark-cluster + - ed-dataproducts-spark-cluster + +- name: Copy Ed Data Products to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + tags: + - ed-dataproducts-spark-cluster - name: Copy Framework Library copy: src={{ analytics_core_artifact }} dest={{ analytics.home }}/models-{{ model_version }} @@ -52,6 +70,15 @@ command: az storage blob upload --overwrite --debug -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Framework Library to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: - framework-spark-cluster @@ -64,6 +91,15 @@ command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} async: 3600 poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Scruid Library to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" tags: - framework-spark-cluster @@ -113,9 +149,18 @@ command: az storage blob upload --overwrite -c {{ bucket }} -f {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf async: 3600 poll: 10 + when: dp_object_store_type == "azure" tags: - framework-spark-cluster +- name: Copy configuration file to oci oss + command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + async: 3600 + poll: 10 + when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + tags: + - framework-spark-cluster + - name: Copy log4j2 xml file template: src=log4j2.xml.j2 dest={{ analytics.home }}/models-{{ model_version }}/log4j2.xml mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: [ dataproducts, framework, ed-dataproducts ] From 5e4749ecb56cb6d0daea3f89f01d20cf95e3e026 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:24:34 +1100 Subject: [PATCH 028/203] corrected typo for oci cli command Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 4d454f9d6f..2f2f3a8063 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" From b7daec66ba39c5ce62ca2f20d4150c0365d125a7 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 29 Dec 2022 19:27:55 +1100 Subject: [PATCH 029/203] corrected typo for oci cli command Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 2f2f3a8063..f4cbf7c216 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: /home/{{analytics_user}}/bin/oci os os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 when: dp_object_store_type == "s3" and cloud_service_provider == "oci" From 1c56877438d17bfab6617fea64cc007ebd0a73b5 Mon Sep 17 00:00:00 2001 From: santhosh-tg Date: Fri, 30 Dec 2022 10:37:23 +0530 Subject: [PATCH 030/203] Update ES roles --- ansible/roles/es-azure-snapshot/defaults/main.yml | 12 +++--------- ansible/roles/es-gcs-snapshot/defaults/main.yml | 8 +++++--- ansible/roles/es-s3-snapshot/defaults/main.yml | 8 +++++--- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/ansible/roles/es-azure-snapshot/defaults/main.yml b/ansible/roles/es-azure-snapshot/defaults/main.yml index 39ca274c0e..de88d89398 100644 --- a/ansible/roles/es-azure-snapshot/defaults/main.yml +++ b/ansible/roles/es-azure-snapshot/defaults/main.yml @@ -1,7 +1,7 @@ snapshot_create_request_body: { type: azure, settings: { - container: "{{ es_backup_storage }}", + container: "{{ cloud_storage_esbackup_foldername }}", base_path: "{{ snapshot_base_path }}_{{ base_path_date }}" } } @@ -10,12 +10,6 @@ snapshot_create_request_body: { es_snapshot_host: "localhost" snapshot_base_path: "default" -es_azure_backup_container_name: "elasticsearch-snapshots" - -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -es_backup_storage: "{{ es_azure_backup_container_name }}" +cloud_storage_esbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_esbackup_foldername: "elasticsearch-snapshots" diff --git a/ansible/roles/es-gcs-snapshot/defaults/main.yml b/ansible/roles/es-gcs-snapshot/defaults/main.yml index 5e3cbece6f..7222b0c06b 100644 --- a/ansible/roles/es-gcs-snapshot/defaults/main.yml +++ b/ansible/roles/es-gcs-snapshot/defaults/main.yml @@ -1,12 +1,14 @@ snapshot_create_request_body: { type: gcs, settings: { - bucket: "{{ gcs_management_bucket_name }}", - base_path: "{{ es_backup_storage }}/{{ snapshot_base_path }}_{{ base_path_date }}" + bucket: "{{ cloud_storage_management_bucketname }}", + base_path: "{{ cloud_storage_esbackup_foldername }}/{{ snapshot_base_path }}_{{ base_path_date }}" } } # Override these values es_snapshot_host: "localhost" snapshot_base_path: "default" -es_backup_storage: "elasticsearch-snapshots" \ No newline at end of file + +cloud_storage_esbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_esbackup_foldername: "elasticsearch-snapshots" diff --git a/ansible/roles/es-s3-snapshot/defaults/main.yml b/ansible/roles/es-s3-snapshot/defaults/main.yml index 7ddda6ebd0..316ae512fb 100644 --- a/ansible/roles/es-s3-snapshot/defaults/main.yml +++ b/ansible/roles/es-s3-snapshot/defaults/main.yml @@ -1,12 +1,14 @@ snapshot_create_request_body: { type: s3, settings: { - bucket: "{{ aws_management_bucket_name }}", - base_path: "{{ es_backup_storage }}/{{ snapshot_base_path }}_{{ base_path_date }}" + bucket: "{{ cloud_storage_esbackup_bucketname }}", + base_path: "{{ cloud_storage_esbackup_foldername }}/{{ snapshot_base_path }}_{{ base_path_date }}" } } # Override these values es_snapshot_host: "localhost" snapshot_base_path: "default" -es_backup_storage: "elasticsearch-snapshots" \ No newline at end of file + +cloud_storage_esbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_esbackup_foldername: "elasticsearch-snapshots" From 4a154e8841a42ae7c6cb2c1fc9285384521656f5 Mon Sep 17 00:00:00 2001 From: santhosh-tg Date: Fri, 30 Dec 2022 11:24:21 +0530 Subject: [PATCH 031/203] Add gcp vars for service accounts --- ansible/artifacts-download.yml | 2 ++ ansible/artifacts-upload.yml | 2 ++ ansible/roles/cassandra-backup/tasks/main.yml | 2 ++ ansible/roles/influxdb_backup/tasks/main.yml | 2 ++ ansible/roles/influxdb_restore/tasks/main.yml | 2 ++ ansible/roles/postgres-managed-service-restore/tasks/main.yml | 2 ++ ansible/roles/postgres-managed-service/tasks/main.yml | 2 ++ ansible/roles/postgresql-backup/tasks/main.yml | 2 ++ ansible/roles/postgresql-restore/tasks/main.yml | 2 ++ ansible/roles/redis-backup/tasks/main.yml | 2 ++ ansible/roles/redis-multiprocess-backup/tasks/main.yml | 2 ++ ansible/roles/redis-multiprocess-restore/tasks/main.yml | 2 ++ ansible/roles/redis-restore/tasks/main.yml | 2 ++ 13 files changed, 26 insertions(+) diff --git a/ansible/artifacts-download.yml b/ansible/artifacts-download.yml index 9db0efb42f..fcf58787f5 100644 --- a/ansible/artifacts-download.yml +++ b/ansible/artifacts-download.yml @@ -21,6 +21,8 @@ name: gcp-cloud-storage tasks_from: download.yml vars: + gcp_storage_service_account_name: "{{ cloud_artifact_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_artifact_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" gcp_path: "{{ artifact }}" local_file_or_folder_path: "{{ artifact_path }}" diff --git a/ansible/artifacts-upload.yml b/ansible/artifacts-upload.yml index 3bdbe73017..305492afc2 100644 --- a/ansible/artifacts-upload.yml +++ b/ansible/artifacts-upload.yml @@ -22,6 +22,8 @@ name: gcp-cloud-storage tasks_from: upload.yml vars: + gcp_storage_service_account_name: "{{ cloud_artifact_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_artifact_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" gcp_path: "{{ artifact }}" local_file_or_folder_path: "{{ artifact_path }}" diff --git a/ansible/roles/cassandra-backup/tasks/main.yml b/ansible/roles/cassandra-backup/tasks/main.yml index f3cf8e916e..b4cc6dd8ed 100755 --- a/ansible/roles/cassandra-backup/tasks/main.yml +++ b/ansible/roles/cassandra-backup/tasks/main.yml @@ -56,6 +56,8 @@ name: gcp-cloud-storage tasks_from: upload-batch.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dpcassandrabackup_bucketname }}" gcp_path: "{{ cloud_storage_dpcassandrabackup_foldername }}/{{ cassandra_backup_gzip_file_name}}" local_file_or_folder_path: "{{ cassandra_backup_gzip_file_path }}" diff --git a/ansible/roles/influxdb_backup/tasks/main.yml b/ansible/roles/influxdb_backup/tasks/main.yml index 11370325a0..2ddf64e950 100644 --- a/ansible/roles/influxdb_backup/tasks/main.yml +++ b/ansible/roles/influxdb_backup/tasks/main.yml @@ -43,6 +43,8 @@ name: gcp-cloud-storage tasks_from: upload.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_management_bucketname }}" gcp_path: "{{ cloud_storage_influxdbbackup_foldername }}/{{ influxdb_backup_file_name }}.zip" local_file_or_folder_path: "{{ influxdb_backup_dir }}/{{ influxdb_backup_file_name }}.zip" diff --git a/ansible/roles/influxdb_restore/tasks/main.yml b/ansible/roles/influxdb_restore/tasks/main.yml index 9c1f3cd436..7a2317caa0 100644 --- a/ansible/roles/influxdb_restore/tasks/main.yml +++ b/ansible/roles/influxdb_restore/tasks/main.yml @@ -37,6 +37,8 @@ name: gcp-cloud-storage tasks_from: download.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_influxdbbackup_bucketname }}" gcp_path: "{{ cloud_storage_influxdbbackup_foldername }}/{{ influxdb_restore_file_name }}" local_file_or_folder_path: "/tmp/{{ influxdb_restore_file_name }}" diff --git a/ansible/roles/postgres-managed-service-restore/tasks/main.yml b/ansible/roles/postgres-managed-service-restore/tasks/main.yml index db903740cd..c46e43e647 100644 --- a/ansible/roles/postgres-managed-service-restore/tasks/main.yml +++ b/ansible/roles/postgres-managed-service-restore/tasks/main.yml @@ -39,6 +39,8 @@ name: gcp-cloud-storage tasks_from: download.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgres_backup_filename }}" local_file_or_folder_path: "{{ postgres_restore_dir }}/{{ postgres_backup_filepath }}" diff --git a/ansible/roles/postgres-managed-service/tasks/main.yml b/ansible/roles/postgres-managed-service/tasks/main.yml index 1a8c69657f..c4189f2a08 100644 --- a/ansible/roles/postgres-managed-service/tasks/main.yml +++ b/ansible/roles/postgres-managed-service/tasks/main.yml @@ -59,6 +59,8 @@ name: gcp-cloud-storage tasks_from: upload.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_backup_gzip_file_name }}.zip" local_file_or_folder_path: "{{ postgresql_backup_gzip_file_path }}" diff --git a/ansible/roles/postgresql-backup/tasks/main.yml b/ansible/roles/postgresql-backup/tasks/main.yml index ddff9d62bd..4be4bda8db 100755 --- a/ansible/roles/postgresql-backup/tasks/main.yml +++ b/ansible/roles/postgresql-backup/tasks/main.yml @@ -44,6 +44,8 @@ name: gcp-cloud-storage tasks_from: upload.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_backup_gzip_file_name }}" local_file_or_folder_path: "{{ postgresql_backup_gzip_file_path }}" diff --git a/ansible/roles/postgresql-restore/tasks/main.yml b/ansible/roles/postgresql-restore/tasks/main.yml index 275061e9f2..7635f7e741 100755 --- a/ansible/roles/postgresql-restore/tasks/main.yml +++ b/ansible/roles/postgresql-restore/tasks/main.yml @@ -34,6 +34,8 @@ name: gcp-cloud-storage tasks_from: download.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_restore_gzip_file_name }}" local_file_or_folder_path: "{{ postgresql_restore_gzip_file_path }}" diff --git a/ansible/roles/redis-backup/tasks/main.yml b/ansible/roles/redis-backup/tasks/main.yml index 061a3414ed..aafe94913c 100644 --- a/ansible/roles/redis-backup/tasks/main.yml +++ b/ansible/roles/redis-backup/tasks/main.yml @@ -49,6 +49,8 @@ name: gcp-cloud-storage tasks_from: upload.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ redis_backup_file_name }}" local_file_or_folder_path: "{{ redis_backup_file_path }}" diff --git a/ansible/roles/redis-multiprocess-backup/tasks/main.yml b/ansible/roles/redis-multiprocess-backup/tasks/main.yml index 9e44b18f05..7e31c94adb 100644 --- a/ansible/roles/redis-multiprocess-backup/tasks/main.yml +++ b/ansible/roles/redis-multiprocess-backup/tasks/main.yml @@ -50,6 +50,8 @@ name: gcp-cloud-storage tasks_from: upload-batch.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}" local_file_or_folder_path: "{{ redis_backup_dir }}/*" diff --git a/ansible/roles/redis-multiprocess-restore/tasks/main.yml b/ansible/roles/redis-multiprocess-restore/tasks/main.yml index 7483f4ac15..87f1838a19 100644 --- a/ansible/roles/redis-multiprocess-restore/tasks/main.yml +++ b/ansible/roles/redis-multiprocess-restore/tasks/main.yml @@ -38,6 +38,8 @@ name: gcp-cloud-storage tasks_from: download.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ item }}/{{ redis_restore_file_name }}" local_file_or_folder_path: "/tmp/{{ item }}/{{ redis_restore_file_name }}" diff --git a/ansible/roles/redis-restore/tasks/main.yml b/ansible/roles/redis-restore/tasks/main.yml index bdab9bd116..a695f11e62 100644 --- a/ansible/roles/redis-restore/tasks/main.yml +++ b/ansible/roles/redis-restore/tasks/main.yml @@ -30,6 +30,8 @@ name: gcp-cloud-storage tasks_from: download.yml vars: + gcp_storage_service_account_name: "{{ cloud_management_storage_accountname }}" + gcp_storage_key_file: "{{ cloud_management_storage_secret }}" gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ redis_restore_file_name }}" dest_file_name: "{{ redis_restore_file_name }}" From 4dbefc932fd61d708f9ecb4800ea08498c04632a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:14:04 +1100 Subject: [PATCH 032/203] testing secor changes for oci oss Signed-off-by: Deepak Devadathan --- .../secor/config/secor.common.properties | 12 ++++++------ .../secor/config/secor.partition.properties | 2 +- .../helm_charts/secor/config/secor.properties | 2 +- kubernetes/helm_charts/secor/values.j2 | 15 +++++++++++++++ 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/kubernetes/helm_charts/secor/config/secor.common.properties b/kubernetes/helm_charts/secor/config/secor.common.properties index 7050ebcf1b..fbe441c187 100644 --- a/kubernetes/helm_charts/secor/config/secor.common.properties +++ b/kubernetes/helm_charts/secor/config/secor.common.properties @@ -23,12 +23,12 @@ secor.kafka.topic_blacklist= # Choose what to fill according to the service you are using # in the choice option you can fill S3, GS, Swift or Azure -cloud.service=Azure +cloud.service={{ $.Values.storage_type }} # AWS authentication credentials. # Leave empty if using IAM role-based authentication with s3a filesystem. -aws.access.key= -aws.secret.key= +aws.access.key={{ $.Values.s3_access_key }} +aws.secret.key={{ $.Values.s3_secret_id }} aws.role= # Optional Proxy Setting. Set to true to enable proxy @@ -51,12 +51,12 @@ aws.proxy.http.port= # secor.upload.manager.class. # # http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region -aws.region= -aws.endpoint= +aws.region={{ $.Values.s3_region }} +aws.endpoint={{ $.Values.s3_endpoint }} # Toggle the AWS S3 client between virtual host style access and path style # access. See http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html -aws.client.pathstyleaccess=false +aws.client.pathstyleaccess={{ $.Values.s3_path_style_access }} ########################### # START AWS S3 ENCRYPTION # diff --git a/kubernetes/helm_charts/secor/config/secor.partition.properties b/kubernetes/helm_charts/secor/config/secor.partition.properties index 743e1bab86..0bee7818ea 100644 --- a/kubernetes/helm_charts/secor/config/secor.partition.properties +++ b/kubernetes/helm_charts/secor/config/secor.partition.properties @@ -23,7 +23,7 @@ secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_grou secor.message.parser.class={{ get (get $.Values.secor_jobs $.Release.Name) "message_parser" }} # S3 path where sequence files are stored. -secor.s3.path= +secor.s3.path={{- get (get $.Values.secor_jobs $.Release.Name) "base_path" }} # Swift path where sequence files are stored. secor.swift.path=secor_dev/partition diff --git a/kubernetes/helm_charts/secor/config/secor.properties b/kubernetes/helm_charts/secor/config/secor.properties index 6f2876d1de..4a724a051a 100644 --- a/kubernetes/helm_charts/secor/config/secor.properties +++ b/kubernetes/helm_charts/secor/config/secor.properties @@ -10,7 +10,7 @@ include=secor.common.properties ############### # Name of the s3 bucket where log files are stored. -secor.s3.bucket= +secor.s3.bucket={{ $.Values.s3_bucket_name }} ############### # Using Swift # diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index 4aa2e0ee83..d09a05b9d6 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -2,6 +2,21 @@ azure_account: "{{ sunbird_private_storage_account_name }}" azure_secret: "{{ sunbird_private_storage_account_key }}" azure_container_name: "telemetry-data-store" +s3_access_key: "{{s3_storage_key}}" +s3_secret_id: "{{s3_storage_secret}}" +s3_region: "{{oci_region}}" +s3_endpoint: "{{s3_storage_endpoint}}" +s3_path_style_access: "{{s3_path_style_access}}" +s3_bucket_name: "telemetry-data-store" + +{% if cloud_service_provider == 'oci' -%} +storage_type: "S3" +{%- else -%} +storage_type: "Azure" +{%- endif %} + + + namespace: {{ secor_namespace }} storageClass: {{ secor_storage_class | default('default') }} imagepullsecrets: {{ imagepullsecrets }} From 7b7854d82a0deea484d5f13b5a650deb166c6e02 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:22:55 +1100 Subject: [PATCH 033/203] added oci-bv as the storage class Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/values.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index d09a05b9d6..62610ee5fe 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -11,6 +11,7 @@ s3_bucket_name: "telemetry-data-store" {% if cloud_service_provider == 'oci' -%} storage_type: "S3" +secor_storage_class: "oci-bv" {%- else -%} storage_type: "Azure" {%- endif %} From 8fd5084dd067d877f2333e8747b397ad9c9460a4 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:32:10 +1100 Subject: [PATCH 034/203] added storageclass selection Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index 62610ee5fe..183bce6c8e 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -11,15 +11,15 @@ s3_bucket_name: "telemetry-data-store" {% if cloud_service_provider == 'oci' -%} storage_type: "S3" -secor_storage_class: "oci-bv" +storageClass: "oci-bv" {%- else -%} storage_type: "Azure" +storageClass: {{ secor_storage_class | default('default') }} {%- endif %} namespace: {{ secor_namespace }} -storageClass: {{ secor_storage_class | default('default') }} imagepullsecrets: {{ imagepullsecrets }} secor_jobs: From 6a4f5513d4e6279d4bc795eaad120ee7b1647aba Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:41:05 +1100 Subject: [PATCH 035/203] added a condition to include secor.azure.properties Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/config/secor.partition.properties | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kubernetes/helm_charts/secor/config/secor.partition.properties b/kubernetes/helm_charts/secor/config/secor.partition.properties index 0bee7818ea..cbcc742081 100644 --- a/kubernetes/helm_charts/secor/config/secor.partition.properties +++ b/kubernetes/helm_charts/secor/config/secor.partition.properties @@ -14,7 +14,9 @@ # limitations under the License. include=secor.properties +{{- if eq .Values.storage_type "Azure" }} include=secor.azure.properties +{{- end }} # Name of the Kafka consumer group. secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_group" }} From ea1e6659ae5f6c026d4afac58c90016df0aad638 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sat, 31 Dec 2022 08:53:46 +1100 Subject: [PATCH 036/203] using S3UploadManager in common.properties Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/secor/config/secor.common.properties | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/secor/config/secor.common.properties b/kubernetes/helm_charts/secor/config/secor.common.properties index fbe441c187..ebe2bb7d26 100644 --- a/kubernetes/helm_charts/secor/config/secor.common.properties +++ b/kubernetes/helm_charts/secor/config/secor.common.properties @@ -357,7 +357,8 @@ secor.max.message.size.bytes=100000 # Class that will manage uploads. Default is to use the hadoop # interface to S3. -secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager +# secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager +secor.upload.manager.class=com.pinterest.secor.uploader.S3UploadManager #Set below property to your timezone, and the events will be parsed and converted to the timezone specified secor.message.timezone=UTC From b80f4d0e7e8f73cdf8f679ce1a33af391a79a823 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:31:41 +1100 Subject: [PATCH 037/203] update flink-conf for telemetry-extractor Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 22630c3015..49c712f120 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -286,6 +286,11 @@ telemetry-extractor: heartbeat.interval: 5000 taskmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} + fs.s3.access.key: {{s3_storage_key}} + fs.s3.secret.key: {{s3_storage_secret}} + fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.path.style.access: {{s3_path_style_access}} + pipeline-preprocessor: pipeline-preprocessor: |+ From e46a8188276c6ce22b262398ca7ab65acc13c52e Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:36:16 +1100 Subject: [PATCH 038/203] hardcoding base.url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 49c712f120..f2ff94c244 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -158,7 +158,8 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} - base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + base.url = s3://dev-data-store/checkpoint {% endif %} } } From 0ec7478e83a98668a20415efc982a4c60d2b210c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:38:29 +1100 Subject: [PATCH 039/203] hardcode base.url with double quotes Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index f2ff94c244..ed51046a59 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -159,7 +159,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = s3://dev-data-store/checkpoint + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 61c4c1707ac63d8cc92655a095f3613d4f8d1c02 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:46:45 +1100 Subject: [PATCH 040/203] updated flink-conf for telemetry extractor Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index ed51046a59..cba8d4e92f 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,6 +291,10 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} + s3.access-key: {{s3_storage_key}} + s3.secret-key: {{s3_storage_secret}} + s3.endpoint: {{s3_storage_endpoint}} + s3.path.style.access: {{s3_path_style_access}} pipeline-preprocessor: From b9ddecd28a76a3d0c06e4dd98ae1af3a2f90dd51 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:53:46 +1100 Subject: [PATCH 041/203] removed changes from flink-conf Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index cba8d4e92f..8a0476c32b 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,10 +291,8 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - s3.access-key: {{s3_storage_key}} - s3.secret-key: {{s3_storage_secret}} - s3.endpoint: {{s3_storage_endpoint}} - s3.path.style.access: {{s3_path_style_access}} + + pipeline-preprocessor: From e83136b6342940051fbbbc971e7c8712a296a2ac Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 08:59:49 +1100 Subject: [PATCH 042/203] temporary change Signed-off-by: Deepak Devadathan --- .../templates/flink_job_deployment.yaml | 6 - .../flink_job_deployment.yaml.disabled | 245 ++++++++++++++++++ .../helm_charts/datapipeline_jobs/values.j2 | 5 +- 3 files changed, 249 insertions(+), 7 deletions(-) create mode 100644 kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index 10e6b62181..c794c7e702 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -112,12 +112,6 @@ spec: "--job-classname={{ .Values.job_classname }}", {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", -{{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled new file mode 100644 index 0000000000..10e6b62181 --- /dev/null +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled @@ -0,0 +1,245 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-jobmanager + namespace: {{ .Values.namespace }} + labels: + app: flink + component: {{ .Release.Name }}-jobmanager +spec: + type: ClusterIP + ports: + - name: rpc + port: {{ .Values.jobmanager.rpc_port }} + - name: blob + port: {{ .Values.jobmanager.blob_port }} + - name: query + port: {{ .Values.jobmanager.query_port }} + - name: ui + port: {{ .Values.jobmanager.ui_port }} + - name: prom + port: {{ .Values.jobmanager.prom_port }} + selector: + app: flink + component: {{ .Release.Name }}-jobmanager + +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-jobmanager-webui + namespace: {{ .Values.namespace }} +{{- if .Values.service.annotations }} +{{- with .Values.service.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +{{- end }} +spec: + {{- if eq .Values.service.type "ClusterIP" }} + type: ClusterIP + {{- end }} + {{- if eq .Values.service.type "LoadBalancer" }} + type: LoadBalancer + {{- end }} + ports: + - name: rest + port: {{ .Values.rest_port }} + protocol: TCP + targetPort: {{ .Values.resttcp_port }} + selector: + app: flink + component: {{ .Release.Name }}-jobmanager + +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-taskmanager-prometheus + namespace: {{ .Values.namespace }} + labels: + app: flink + component: {{ .Release.Name }}-taskmanager +spec: + type: ClusterIP + ports: + - name: prom + port: {{ .Values.taskmanager.prom_port }} + selector: + app: flink + component: {{ .Release.Name }}-taskmanager + +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Release.Name }}-jobmanager + namespace: {{ .Values.namespace }} +spec: + template: + metadata: + labels: + app: flink + component: {{ .Release.Name }}-jobmanager + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: "{{ .Values.jobmanager.prom_port }}" + spec: + volumes: + - name: flink-config-volume + configMap: + name: {{ .Release.Name }}-config + items: + - key: flink-conf + path: flink-conf.yaml + - key: base-config + path: base-config.conf + - key: {{ .Release.Name }} + path: {{ .Release.Name }}.conf + - key: log4j_console_properties + path: log4j-console.properties + restartPolicy: OnFailure + imagePullSecrets: + - name: {{ .Values.imagepullsecrets }} + containers: + - name: {{ .Release.Name }}-jobmanager + image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" + imagePullPolicy: Always + workingDir: /opt/flink + command: ["/opt/flink/bin/standalone-job.sh"] + args: ["start-foreground", + "--job-classname={{ .Values.job_classname }}", +{{- if eq .Values.checkpoint_store_type "azure" }} + "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} + "-Dweb.submit.enable=false", + "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", + "-Dmetrics.reporter.prom.port={{ .Values.jobmanager.prom_port }}", + "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", + "-Djobmanager.rpc.port={{ .Values.jobmanager.rpc_port }}", + "-Dparallelism.default=1", + "-Dblob.server.port={{ .Values.jobmanager.blob_port }}", + "-Dqueryable-state.server.ports={{ .Values.jobmanager.query_port }}", + "--config.file.path", + "/data/flink/conf/{{ .Release.Name }}.conf"] + ports: + - containerPort: {{ .Values.jobmanager.rpc_port }} + name: rpc + - containerPort: {{ .Values.jobmanager.blob_port }} + name: blob + - containerPort: {{ .Values.jobmanager.query_port }} + name: query + - containerPort: {{ .Values.jobmanager.ui_port }} + name: ui + volumeMounts: + - name: flink-config-volume + mountPath: /opt/flink/conf/flink-conf.yaml + subPath: flink-conf.yaml + - name: flink-config-volume + mountPath: /data/flink/conf/base-config.conf + subPath: base-config.conf + - name: flink-config-volume + mountPath: /data/flink/conf/{{ .Release.Name }}.conf + subPath: {{ .Release.Name }}.conf + - name: flink-config-volume + mountPath: /opt/flink/conf/log4j-console.properties + subPath: log4j-console.properties + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-taskmanager + namespace: {{ .Values.namespace }} +spec: + replicas: {{ .Values.taskmanager.replicas }} + selector: + matchLabels: + app: flink + component: {{ .Release.Name }}-taskmanager + template: + metadata: + labels: + app: flink + component: {{ .Release.Name }}-taskmanager + spec: + volumes: + - name: flink-config-volume + configMap: + name: {{ .Release.Name }}-config + items: + - key: flink-conf + path: flink-conf.yaml + - key: log4j_console_properties + path: log4j-console.properties + imagePullSecrets: + - name: {{ .Values.imagepullsecrets }} + containers: + - name: {{ .Release.Name }}-taskmanager + image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" + imagePullPolicy: Always + resources: + requests: + cpu: "{{ .Values.taskmanager.cpu_requests }}" + workingDir: {{ .Values.taskmanager.flink_work_dir }} + command: ["/opt/flink/bin/taskmanager.sh"] + args: ["start-foreground", +{{- if eq .Values.checkpoint_store_type "azure" }} + "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} + "-Dweb.submit.enable=false", + "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", + "-Dmetrics.reporter.prom.host={{ .Release.Name }}-taskmanager", + "-Dmetrics.reporter.prom.port=9251-9260", + "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", + "-Dtaskmanager.rpc.port={{ .Values.taskmanager.rpc_port }}"] + ports: + - containerPort: {{ .Values.taskmanager.rpc_port }} + name: rpc + {{- if .Values.healthcheck }} + livenessProbe: +{{ toYaml .Values.livenessProbe | indent 10 }} + {{- end }} + volumeMounts: + - name: flink-config-volume + mountPath: /opt/flink/conf/flink-conf.yaml + subPath: flink-conf.yaml + - name: flink-config-volume + mountPath: /opt/flink/conf/log4j-console.properties + subPath: log4j-console.properties + +{{- $name := .Release.Name }} +{{- $prop := (index .Values.scale_properties $name)}} +{{- if $prop.enabled}} +--- +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Release.Name }}-taskmanager-hpa + namespace: {{ .Values.namespace }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Release.Name }}-taskmanager + minReplicas: {{ $prop.min_replica }} + maxReplicas: {{ $prop.max_replica }} + metrics: + - type: External + external: + metricName: {{ .Release.Name }}_kafka_consumergroup_lag_sum + targetValue: "{{ $prop.scale_target_value }}" +{{- end }} \ No newline at end of file diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 8a0476c32b..7b1dd3b833 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,7 +291,10 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - + s3.access-key: {{s3_storage_key}} + s3.secret-key: {{s3_storage_secret}} + s3.endpoint: {{s3_storage_endpoint}} + s3.path.style.access: {{s3_path_style_access}} From 5ced03952f8c8bc8a25a66f2e3f2412bbb32db13 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 09:01:55 +1100 Subject: [PATCH 043/203] temporary change Signed-off-by: Deepak Devadathan --- .../flink_job_deployment.yaml.disabled | 245 ------------------ 1 file changed, 245 deletions(-) delete mode 100644 kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled deleted file mode 100644 index 10e6b62181..0000000000 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml.disabled +++ /dev/null @@ -1,245 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-jobmanager - namespace: {{ .Values.namespace }} - labels: - app: flink - component: {{ .Release.Name }}-jobmanager -spec: - type: ClusterIP - ports: - - name: rpc - port: {{ .Values.jobmanager.rpc_port }} - - name: blob - port: {{ .Values.jobmanager.blob_port }} - - name: query - port: {{ .Values.jobmanager.query_port }} - - name: ui - port: {{ .Values.jobmanager.ui_port }} - - name: prom - port: {{ .Values.jobmanager.prom_port }} - selector: - app: flink - component: {{ .Release.Name }}-jobmanager - ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-jobmanager-webui - namespace: {{ .Values.namespace }} -{{- if .Values.service.annotations }} -{{- with .Values.service.annotations }} - annotations: -{{ toYaml . | indent 4 }} -{{- end }} -{{- end }} -spec: - {{- if eq .Values.service.type "ClusterIP" }} - type: ClusterIP - {{- end }} - {{- if eq .Values.service.type "LoadBalancer" }} - type: LoadBalancer - {{- end }} - ports: - - name: rest - port: {{ .Values.rest_port }} - protocol: TCP - targetPort: {{ .Values.resttcp_port }} - selector: - app: flink - component: {{ .Release.Name }}-jobmanager - ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ .Release.Name }}-taskmanager-prometheus - namespace: {{ .Values.namespace }} - labels: - app: flink - component: {{ .Release.Name }}-taskmanager -spec: - type: ClusterIP - ports: - - name: prom - port: {{ .Values.taskmanager.prom_port }} - selector: - app: flink - component: {{ .Release.Name }}-taskmanager - ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ .Release.Name }}-jobmanager - namespace: {{ .Values.namespace }} -spec: - template: - metadata: - labels: - app: flink - component: {{ .Release.Name }}-jobmanager - annotations: - prometheus.io/scrape: 'true' - prometheus.io/port: "{{ .Values.jobmanager.prom_port }}" - spec: - volumes: - - name: flink-config-volume - configMap: - name: {{ .Release.Name }}-config - items: - - key: flink-conf - path: flink-conf.yaml - - key: base-config - path: base-config.conf - - key: {{ .Release.Name }} - path: {{ .Release.Name }}.conf - - key: log4j_console_properties - path: log4j-console.properties - restartPolicy: OnFailure - imagePullSecrets: - - name: {{ .Values.imagepullsecrets }} - containers: - - name: {{ .Release.Name }}-jobmanager - image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" - imagePullPolicy: Always - workingDir: /opt/flink - command: ["/opt/flink/bin/standalone-job.sh"] - args: ["start-foreground", - "--job-classname={{ .Values.job_classname }}", -{{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", -{{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", -{{- end }} - "-Dweb.submit.enable=false", - "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", - "-Dmetrics.reporter.prom.port={{ .Values.jobmanager.prom_port }}", - "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", - "-Djobmanager.rpc.port={{ .Values.jobmanager.rpc_port }}", - "-Dparallelism.default=1", - "-Dblob.server.port={{ .Values.jobmanager.blob_port }}", - "-Dqueryable-state.server.ports={{ .Values.jobmanager.query_port }}", - "--config.file.path", - "/data/flink/conf/{{ .Release.Name }}.conf"] - ports: - - containerPort: {{ .Values.jobmanager.rpc_port }} - name: rpc - - containerPort: {{ .Values.jobmanager.blob_port }} - name: blob - - containerPort: {{ .Values.jobmanager.query_port }} - name: query - - containerPort: {{ .Values.jobmanager.ui_port }} - name: ui - volumeMounts: - - name: flink-config-volume - mountPath: /opt/flink/conf/flink-conf.yaml - subPath: flink-conf.yaml - - name: flink-config-volume - mountPath: /data/flink/conf/base-config.conf - subPath: base-config.conf - - name: flink-config-volume - mountPath: /data/flink/conf/{{ .Release.Name }}.conf - subPath: {{ .Release.Name }}.conf - - name: flink-config-volume - mountPath: /opt/flink/conf/log4j-console.properties - subPath: log4j-console.properties - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Release.Name }}-taskmanager - namespace: {{ .Values.namespace }} -spec: - replicas: {{ .Values.taskmanager.replicas }} - selector: - matchLabels: - app: flink - component: {{ .Release.Name }}-taskmanager - template: - metadata: - labels: - app: flink - component: {{ .Release.Name }}-taskmanager - spec: - volumes: - - name: flink-config-volume - configMap: - name: {{ .Release.Name }}-config - items: - - key: flink-conf - path: flink-conf.yaml - - key: log4j_console_properties - path: log4j-console.properties - imagePullSecrets: - - name: {{ .Values.imagepullsecrets }} - containers: - - name: {{ .Release.Name }}-taskmanager - image: "{{ .Values.dockerhub }}/{{ .Values.repository }}:{{ .Values.image_tag }}" - imagePullPolicy: Always - resources: - requests: - cpu: "{{ .Values.taskmanager.cpu_requests }}" - workingDir: {{ .Values.taskmanager.flink_work_dir }} - command: ["/opt/flink/bin/taskmanager.sh"] - args: ["start-foreground", -{{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", -{{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", -{{- end }} - "-Dweb.submit.enable=false", - "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", - "-Dmetrics.reporter.prom.host={{ .Release.Name }}-taskmanager", - "-Dmetrics.reporter.prom.port=9251-9260", - "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", - "-Dtaskmanager.rpc.port={{ .Values.taskmanager.rpc_port }}"] - ports: - - containerPort: {{ .Values.taskmanager.rpc_port }} - name: rpc - {{- if .Values.healthcheck }} - livenessProbe: -{{ toYaml .Values.livenessProbe | indent 10 }} - {{- end }} - volumeMounts: - - name: flink-config-volume - mountPath: /opt/flink/conf/flink-conf.yaml - subPath: flink-conf.yaml - - name: flink-config-volume - mountPath: /opt/flink/conf/log4j-console.properties - subPath: log4j-console.properties - -{{- $name := .Release.Name }} -{{- $prop := (index .Values.scale_properties $name)}} -{{- if $prop.enabled}} ---- -apiVersion: autoscaling/v2beta1 -kind: HorizontalPodAutoscaler -metadata: - name: {{ .Release.Name }}-taskmanager-hpa - namespace: {{ .Values.namespace }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ .Release.Name }}-taskmanager - minReplicas: {{ $prop.min_replica }} - maxReplicas: {{ $prop.max_replica }} - metrics: - - type: External - external: - metricName: {{ .Release.Name }}_kafka_consumergroup_lag_sum - targetValue: "{{ $prop.scale_target_value }}" -{{- end }} \ No newline at end of file From 426036fbd3d3218d8862767cd4749af7e77aaa75 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 09:03:22 +1100 Subject: [PATCH 044/203] temporary change Signed-off-by: Deepak Devadathan --- .../datapipeline_jobs/templates/flink_job_deployment.yaml | 6 ++++++ kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index c794c7e702..10e6b62181 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -112,6 +112,12 @@ spec: "--job-classname={{ .Values.job_classname }}", {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 7b1dd3b833..d2a82456f3 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -291,10 +291,6 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - s3.access-key: {{s3_storage_key}} - s3.secret-key: {{s3_storage_secret}} - s3.endpoint: {{s3_storage_endpoint}} - s3.path.style.access: {{s3_path_style_access}} From 3ffd0e4ce11de5d2b3bbb023cd4e20eec53c63aa Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 09:59:59 +1100 Subject: [PATCH 045/203] added hard code value for region Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index d2a82456f3..1cb6466932 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,9 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.endpoint.region: ap-hyderabad-1 fs.s3.path.style.access: {{s3_path_style_access}} + From 9a8e39f71202e3ed50079e2eb3fbe72be07df4e4 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:02:22 +1100 Subject: [PATCH 046/203] removed the hardcode region for flink Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 1cb6466932..b46c2b2d81 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,6 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} - fs.s3.endpoint.region: ap-hyderabad-1 fs.s3.path.style.access: {{s3_path_style_access}} From 6d0fb196de38b08fdca2e46b79b56b1e1e1f0c34 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:03:37 +1100 Subject: [PATCH 047/203] added a trailing / Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index b46c2b2d81..ab46f36a65 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -159,7 +159,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3://dev-data-store/checkpoint/" {% endif %} } } From 4f6a9b07f1faed62691762dd8ba4219d7c6d7b98 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:05:58 +1100 Subject: [PATCH 048/203] removed trailing slash Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index ab46f36a65..b46c2b2d81 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -159,7 +159,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint/" + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 7c2482838f58843d4ff145598c809c978b9cb60d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:06:41 +1100 Subject: [PATCH 049/203] changed base.url for s3 Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index b46c2b2d81..e4086dbde0 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -158,8 +158,8 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} - #base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + #base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From d149abc49e89944531d8ccc16bec89b599d188e6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:09:23 +1100 Subject: [PATCH 050/203] hardcode base.url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index e4086dbde0..ed97213052 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -158,8 +158,8 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} - base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - #base.url = "s3://dev-data-store/checkpoint" + # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 3b55ac1871cb84787d93c6d552cfd63f475e7718 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 10:52:49 +1100 Subject: [PATCH 051/203] debug level only for hadoop Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index ed97213052..7cfbf88ef3 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -117,7 +117,8 @@ log4j_console_properties: | logger.kafka.name= org.apache.kafka logger.kafka.level = {{ flink_libraries_log_level | default(INFO) }} logger.hadoop.name = org.apache.hadoop - logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} + # logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} + logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} logger.zookeeper.name = org.apache.zookeeper logger.zookeeper.level = {{ flink_libraries_log_level | default(INFO) }} From ac6d377a9a37e043f127c014e41615c2a75aef5c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 11:47:33 +1100 Subject: [PATCH 052/203] using s3a Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 7cfbf88ef3..f301b0651a 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -160,7 +160,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3a://dev-data-store/checkpoint" {% endif %} } } From 60a50c3ebd13813d21dccb869dbd4c3b610d0dfc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 11:49:53 +1100 Subject: [PATCH 053/203] using s3 url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index f301b0651a..7cfbf88ef3 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -160,7 +160,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3a://dev-data-store/checkpoint" + base.url = "s3://dev-data-store/checkpoint" {% endif %} } } From 033b16074b2ed541cdd8a177d20e08ef1143f407 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 13:07:06 +1100 Subject: [PATCH 054/203] removed hadoop logging Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 7cfbf88ef3..e9df6b5fcb 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -117,8 +117,8 @@ log4j_console_properties: | logger.kafka.name= org.apache.kafka logger.kafka.level = {{ flink_libraries_log_level | default(INFO) }} logger.hadoop.name = org.apache.hadoop - # logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} - logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} + logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} + # logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} logger.zookeeper.name = org.apache.zookeeper logger.zookeeper.level = {{ flink_libraries_log_level | default(INFO) }} From 2a5050f1a8e456525e2a4f6a5bda453da1c7b9f3 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:05:43 +1100 Subject: [PATCH 055/203] testing with sse-c values Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index e9df6b5fcb..69e4c8a170 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -292,8 +292,8 @@ telemetry-extractor: fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} - - + fs.s3.server-side-encryption-algorithm: SSE-C + fs.s3a.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= pipeline-preprocessor: From 58dec826c1e3829137d2fc2e9b643401b8383aa4 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:08:06 +1100 Subject: [PATCH 056/203] corrected the typo Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 69e4c8a170..12e430c7ca 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -293,7 +293,7 @@ telemetry-extractor: fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} fs.s3.server-side-encryption-algorithm: SSE-C - fs.s3a.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= + fs.s3.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= pipeline-preprocessor: From f53d043c2c1906edc7866f5c7e183d3237e1be30 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:37:09 +1100 Subject: [PATCH 057/203] hardcoded endpoint Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 12e430c7ca..d87633105a 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,10 +290,8 @@ telemetry-extractor: jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.endpoint: https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com fs.s3.path.style.access: {{s3_path_style_access}} - fs.s3.server-side-encryption-algorithm: SSE-C - fs.s3.server-side-encryption.key: SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= pipeline-preprocessor: From 898177c7f47218b86fc496fdfc28c1a6b510760a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:37:35 +1100 Subject: [PATCH 058/203] hard coded endpoint url Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index d87633105a..799a10d454 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,7 @@ telemetry-extractor: jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com + fs.s3.endpoint: 'https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com' fs.s3.path.style.access: {{s3_path_style_access}} From 8a64d3e8037282ec10cd84a9932951cb99597171 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 14:39:37 +1100 Subject: [PATCH 059/203] endpoint as variable Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 799a10d454..cb20557cd9 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,7 @@ telemetry-extractor: jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: 'https://apaccpt03.compat.objectstorage.ap-hyderabad-1.oraclecloud.com' + fs.s3.endpoint: {{s3_storage_endpoint}} fs.s3.path.style.access: {{s3_path_style_access}} From f451d4554cf5af00cee1518a9b2c603b75b51bb9 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 15:32:51 +1100 Subject: [PATCH 060/203] removed hadoop specific logging Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index cb20557cd9..efcc3cdda9 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -118,7 +118,6 @@ log4j_console_properties: | logger.kafka.level = {{ flink_libraries_log_level | default(INFO) }} logger.hadoop.name = org.apache.hadoop logger.hadoop.level = {{ flink_libraries_log_level | default(INFO) }} - # logger.hadoop.level = {{ flink_hadoop_log_level | default(INFO) }} logger.zookeeper.name = org.apache.zookeeper logger.zookeeper.level = {{ flink_libraries_log_level | default(INFO) }} From 0a4380a36cdff95cd3bf3b859ddd7fed7faf2be5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 15:53:22 +1100 Subject: [PATCH 061/203] added explicit region Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index efcc3cdda9..951b9772ba 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,6 +290,7 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} + fs.s3.endpoint.region: {{s3_region}} fs.s3.path.style.access: {{s3_path_style_access}} From 06a5bad926cc1fadb551468aeaf852869ed48ffc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 16:04:06 +1100 Subject: [PATCH 062/203] removed region flag Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 951b9772ba..efcc3cdda9 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -290,7 +290,6 @@ telemetry-extractor: fs.s3.access.key: {{s3_storage_key}} fs.s3.secret.key: {{s3_storage_secret}} fs.s3.endpoint: {{s3_storage_endpoint}} - fs.s3.endpoint.region: {{s3_region}} fs.s3.path.style.access: {{s3_path_style_access}} From bb5fa355d4cb2db41b4a1ee359950c6363337b4a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 16:21:17 +1100 Subject: [PATCH 063/203] changed oci specific end point for s3 Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index efcc3cdda9..2f413910df 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -8,7 +8,12 @@ azure_account: {{ azure_account }} azure_secret: {{ azure_secret }} s3_access_key: {{ s3_storage_key }} s3_secret_key: {{ s3_storage_secret }} +{% if cloud_service_provider == "oci" %} +s3_endpoint: {{ oci_flink_s3_storage_endpoint }} +{% else %} s3_endpoint: {{ s3_storage_endpoint }} +{% endif %} + s3_path_style_access: {{ s3_path_style_access }} serviceMonitor: @@ -159,7 +164,7 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://dev-data-store/checkpoint" + base.url = "s3://"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} {% endif %} } } From 0394d1be01798ade853e105ecbf24b4d90fb09d2 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 2 Jan 2023 16:27:35 +1100 Subject: [PATCH 064/203] removed customization from flink-conf.yaml Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 2f413910df..cbafb57c22 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -242,7 +242,6 @@ ingest-router: taskmanager.memory.process.size: {{ flink_job_names['ingest-router'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['ingest-router'].jobmanager_process_memory }} - telemetry-extractor: telemetry-extractor: |+ include file("/data/flink/conf/base-config.conf") @@ -292,10 +291,6 @@ telemetry-extractor: heartbeat.interval: 5000 taskmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].taskmanager_process_memory }} jobmanager.memory.process.size: {{ flink_job_names['telemetry-extractor'].jobmanager_process_memory }} - fs.s3.access.key: {{s3_storage_key}} - fs.s3.secret.key: {{s3_storage_secret}} - fs.s3.endpoint: {{s3_storage_endpoint}} - fs.s3.path.style.access: {{s3_path_style_access}} pipeline-preprocessor: From fb8f586f13e980b3d09e9799c2415232da1f5aee Mon Sep 17 00:00:00 2001 From: Anand Parthasarathy Date: Fri, 6 Jan 2023 15:58:55 +0530 Subject: [PATCH 065/203] Issue #000: Fix ruby version for spark provisioning script --- ansible/roles/analytics-spark-provision/tasks/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/analytics-spark-provision/tasks/main.yml b/ansible/roles/analytics-spark-provision/tasks/main.yml index 01eef6c9e1..25ebd9da23 100644 --- a/ansible/roles/analytics-spark-provision/tasks/main.yml +++ b/ansible/roles/analytics-spark-provision/tasks/main.yml @@ -120,7 +120,7 @@ - name: Install latest ruby become: yes become_user: "{{ analytics_user }}" - shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.2" + shell: "export PATH=$PATH:/home/analytics/.rvm/bin && rvm install ruby-2.5" - name: Add ruby repository become: yes @@ -130,7 +130,7 @@ - name: Install latest ruby-dev become: yes apt: - name: "ruby2.2-dev" + name: "ruby2.5-dev" state: installed update_cache: true cache_valid_time: 3600 @@ -138,7 +138,7 @@ - name: Install ruby-kafka become: yes become_user: "{{ analytics_user }}" - shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.2 && gem install ruby-kafka'" + shell: "bash -ilc 'export PATH=$PATH:/home/analytics/.rvm/bin && rvm --default use ruby-2.5 && gem install ruby-kafka'" - name: Download Kafka-2.11 become: yes From 21f1a857db1893408416049292e1fe20067c6185 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 24 Jan 2023 11:27:05 +1100 Subject: [PATCH 066/203] added variable for endpoint for esclouduploader Signed-off-by: Deepak Devadathan --- .../content-snapshot-indexer/templates/conf/ESCloudUploader.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/content-snapshot-indexer/templates/conf/ESCloudUploader.j2 b/ansible/roles/content-snapshot-indexer/templates/conf/ESCloudUploader.j2 index 62312f203f..dab231923f 100755 --- a/ansible/roles/content-snapshot-indexer/templates/conf/ESCloudUploader.j2 +++ b/ansible/roles/content-snapshot-indexer/templates/conf/ESCloudUploader.j2 @@ -8,4 +8,5 @@ cloudStorage.container="{{ cloud_storage.container }}" cloudStorage.objectKey="{{ cloud_storage.object_key }}" cloudStorage.provider="{{ cloud_storage.provider }}" cloudStorage.accountName="{{ cloud_storage.account_name }}" -cloudStorage.accountKey="{{ cloud_storage.account_key }}" \ No newline at end of file +cloudStorage.accountKey="{{ cloud_storage.account_key }}" +cloudStorage.accountEndpoint="{{ cloud_storage.account_endpoint }}" \ No newline at end of file From 673d2f1feab42d7389952e2dca92dc07c517c639 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 24 Jan 2023 11:33:35 +1100 Subject: [PATCH 067/203] added variable for etljob endpoint Signed-off-by: Deepak Devadathan --- ansible/roles/content-snapshot-indexer/defaults/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/content-snapshot-indexer/defaults/main.yml b/ansible/roles/content-snapshot-indexer/defaults/main.yml index 40ba1ef8dd..d6c3b6b6aa 100644 --- a/ansible/roles/content-snapshot-indexer/defaults/main.yml +++ b/ansible/roles/content-snapshot-indexer/defaults/main.yml @@ -48,6 +48,7 @@ cloud_storage: provider: "azure" account_name: "{{sunbird_public_storage_account_name}}" account_key: "{{sunbird_public_storage_account_key}}" + account_endpoint: "{{sunbird_public_storage_account_endpoint}}" cassandra: host: "{{lp_cassandra_host}}" ## LMS-Cassandra IP Address. From f808208a3f7ab9e15433c9c0b90667c7554ab243 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 11:28:50 +1100 Subject: [PATCH 068/203] templated jinja templates Signed-off-by: Deepak Devadathan --- .../templates/cluster-config.json.j2 | 31 +++++++++++ .../templates/common.conf.j2 | 6 ++- .../templates/model-config.j2 | 51 ++++++++++--------- .../templates/model-config.json.j2 | 26 +++++----- .../templates/model-dock-config.j2 | 8 +-- 5 files changed, 81 insertions(+), 41 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1a26514684..e899827fdb 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -1,3 +1,5 @@ + +{% if dp_object_store_type == "azure" %} { "jars": [ "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -25,3 +27,32 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +{ + "jars": [ + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "s3n://{{ bucket }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index bde88ec9d4..bec3d21d81 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -251,8 +251,12 @@ dcetextbook.filename="DCE_textbook_data.csv" etbtextbook.filename="ETB_textbook_data.csv" etb.dialcode.druid.length={{ etb_dialcode_list_druid_length }} - +{% if dp_object_store_type == "azure" %} druid.report.default.storage="azure" +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +druid.report.default.storage="s3" +{% endif %} + druid.report.date.format="yyyy-MM-dd" druid.report.default.container="report-verification" diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 2bb0a042ea..8b238e8ef2 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -30,19 +30,24 @@ config() { if [ ! -z "$2" ]; then keyword=$2; fi case "$1" in "assessment-correction") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' ;; "assessment-archival") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' ;; "assessment-archived-removal") +{% if dp_object_store_type == "azure" %} echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"s3","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + ;; +{% endif %} "collection-reconciliation-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' ;; "collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"azure","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "score-metric-migration-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' @@ -51,34 +56,34 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' ;; "course-batch-status-updater") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"azure","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' ;; "collection-summary-report-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"azure","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' ;; "uci-private-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' ;; "uci-response-exhaust") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"azure","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"azure","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "response-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' ;; "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"azure","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' ;; "druid_reports") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' @@ -94,10 +99,10 @@ config() { ;; "wfs") echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' - #echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' + #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' ;; "video-streaming") - echo '{"search":{"type":"azure"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' ;; "admin-user-reports") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' @@ -106,10 +111,10 @@ config() { echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' ;; "telemetry-replay") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' ;; "summary-replay") - echo '{"search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' ;; "content-rating-updater") echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.updater.UpdateContentRating","modelParams": {"startDate": "'$endDate'","endDate": "'$endDate'"},"output": [{"to":"console","params":{"printEvent":false}}],"parallelization": 8,"appName": "Content Rating Updater","deviceMapping": false}' @@ -118,25 +123,25 @@ config() { echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.ExperimentDefinitionModel","modelParams":{"sparkElasticsearchConnectionHost":"{{ lp_composite_search_host }}"},"output":[{"to":"elasticsearch","params":{"index":"experiment"}}],"parallelization":8,"appName":"Experiment-Definition","deviceMapping":false}' ;; "etb-metrics") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' ;; "course-enrollment-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' ;; "course-consumption-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "azure","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' ;; "textbook-progress-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"azure","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' ;; "audit-metrics-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"azure","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"azure","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"azure","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "azure","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' ;; "druid-dataset") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"azure","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' ;; "*") echo "Unknown model code" diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index 4594a1978a..a3569c7f46 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -1,7 +1,7 @@ { "wfs": { "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -24,7 +24,7 @@ }, "output": [ { - "to": "azure", + "to": "{{dp_object_store_type}}", "params": { "bucket": "{{ bucket }}", "key": "{{ job_manager_tmp_dir }}/wfs/$(date --date yesterday '+%Y-%m-%d')" @@ -44,7 +44,7 @@ }, "video-streaming": { "search": { - "type": "azure" + "type": "{{dp_object_store_type}}" }, "model": "org.ekstep.analytics.job.VideoStreamingJob", "modelParams": { @@ -297,7 +297,7 @@ "tenantId": "", "slugName": "" }, - "store": "azure", + "store": "{{dp_object_store_type}}", "format": "csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -346,7 +346,7 @@ "limit": 10000 } }, - "store": "azure", + "store": "{{dp_object_store_type}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -457,7 +457,7 @@ }], "queryType": "groupBy" }, - "store": "azure", + "store": "{{dp_object_store_type}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -486,7 +486,7 @@ { "name": "denorm", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -532,7 +532,7 @@ { "name": "failed", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -546,7 +546,7 @@ { "name": "unique", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -560,7 +560,7 @@ { "name": "raw", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", @@ -574,7 +574,7 @@ { "name": "channel-raw", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "folder": true, @@ -589,7 +589,7 @@ { "name": "channel-summary", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "folder": true, @@ -604,7 +604,7 @@ { "name": "derived", "search": { - "type": "azure", + "type": "{{dp_object_store_type}}", "queries": [ { "bucket": "{{ bucket }}", diff --git a/ansible/roles/data-products-deploy/templates/model-dock-config.j2 b/ansible/roles/data-products-deploy/templates/model-dock-config.j2 index 20d82dbfb5..f720f4687e 100644 --- a/ansible/roles/data-products-deploy/templates/model-dock-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-dock-config.j2 @@ -15,16 +15,16 @@ config() { if [ ! -z "$3" ]; then inputBucket=$3; fi case "$1" in "content-details") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"azure","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{dp_object_store_type}}","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' ;; "sourcing-summary-report") - echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "azure", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' + echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "{{dp_object_store_type}}", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' ;; "funnel-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "azure","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "{{dp_object_store_type}}","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "azure","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{dp_object_store_type}}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' ;; "*") echo "Unknown model code" From 7eff73f93b1d90949b45033ec6ac8929720967b3 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 14:44:34 +1100 Subject: [PATCH 069/203] updated storage type info Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index bec3d21d81..c86cf0ef10 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -17,9 +17,13 @@ reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} cloud_storage_type="azure" {% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} +{% if cloud_service_provider == "oci" %} +cloud_storage_type="oci" +{% else %} cloud_storage_type="s3" +{% endif %} cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" -cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" +cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" {% endif %} From 6910f88c86b921295d625a2b17b8203332aa648c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 17:04:12 +1100 Subject: [PATCH 070/203] updated the store type as template value Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index c86cf0ef10..eb56426862 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -16,7 +16,7 @@ reports.storage.key.config="{{ dp_reports_storage_key_config }}" reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} cloud_storage_type="azure" -{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} {% if cloud_service_provider == "oci" %} cloud_storage_type="oci" {% else %} diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 8b238e8ef2..86f376b65d 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -40,7 +40,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"s3","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% endif %} "collection-reconciliation-job") From 24ab958ac86f0289631b6c20d001eb5e615048ce Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 1 Feb 2023 17:26:39 +1100 Subject: [PATCH 071/203] added the endpoint variable for jobmanager Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index eb56426862..e0ec7005df 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -24,6 +24,7 @@ cloud_storage_type="s3" {% endif %} cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" +storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" {% endif %} From 458acc444521b41965b6a4701bb4289611e0dfa7 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 2 Feb 2023 10:40:36 +1100 Subject: [PATCH 072/203] updated the condition of oss upload Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index f4cbf7c216..c659f75113 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -8,13 +8,13 @@ - name: Ensure oci oss bucket exists command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" and check_bucket.rc !=0 + when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -35,7 +35,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - dataproducts-spark-cluster @@ -57,7 +57,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - ed-dataproducts-spark-cluster @@ -78,7 +78,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - framework-spark-cluster @@ -99,7 +99,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - framework-spark-cluster @@ -157,7 +157,7 @@ command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 - when: dp_object_store_type == "s3" and cloud_service_provider == "oci" + when: dp_object_store_type == "oci" tags: - framework-spark-cluster From 14f2a405e157f9ac562eb0b59e54bc6a2acb0f7c Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Mon, 13 Feb 2023 19:21:22 +0530 Subject: [PATCH 073/203] Added oci instance prinicipal env var --- ansible/artifacts-upload.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/artifacts-upload.yml b/ansible/artifacts-upload.yml index 3bc192e194..ee268ef30b 100644 --- a/ansible/artifacts-upload.yml +++ b/ansible/artifacts-upload.yml @@ -43,6 +43,9 @@ - name: upload artifact to oci oss include_role: name: oci-cloud-storage + apply: + environment: + OCI_CLI_AUTH: "instance_principal" tasks_from: upload.yml vars: local_file_or_folder_path: "{{ artifact_path }}" From 24920cc7799e48f1e71893c6b13adfb5438ed732 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Tue, 14 Feb 2023 09:36:04 +0530 Subject: [PATCH 074/203] Added oci instance prinicipal env var --- ansible/artifacts-download.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible/artifacts-download.yml b/ansible/artifacts-download.yml index e216c10999..8597646204 100644 --- a/ansible/artifacts-download.yml +++ b/ansible/artifacts-download.yml @@ -43,6 +43,9 @@ - name: download artifact from oci oss include_role: name: oci-cloud-storage + apply: + environment: + OCI_CLI_AUTH: "instance_principal" tasks_from: download.yml vars: local_file_or_folder_path: "{{ artifact_path }}" From f546aea9eef239540a18ce4a9d4176f7b90259ff Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 22 Feb 2023 23:06:10 +1100 Subject: [PATCH 075/203] added oci instance principal variable Signed-off-by: Deepak Devadathan --- ansible/lpa_data-products_deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/lpa_data-products_deploy.yml b/ansible/lpa_data-products_deploy.yml index 7e84ba53f7..1ff0cbdabc 100644 --- a/ansible/lpa_data-products_deploy.yml +++ b/ansible/lpa_data-products_deploy.yml @@ -7,5 +7,6 @@ environment: AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + OCI_CLI_AUTH: "instance_principal" roles: - data-products-deploy From 6813a3ba65f5448b7348ee711a64d97cb178f0ab Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Thu, 23 Feb 2023 14:54:46 +0530 Subject: [PATCH 076/203] python version upgrade for Ubuntu 22.04 --- ansible/roles/portal-dashboard/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/portal-dashboard/tasks/main.yml b/ansible/roles/portal-dashboard/tasks/main.yml index adfbd76b39..f822cd3881 100644 --- a/ansible/roles/portal-dashboard/tasks/main.yml +++ b/ansible/roles/portal-dashboard/tasks/main.yml @@ -12,7 +12,7 @@ pip: name: "{{library_path}}" virtualenv: "{{ virtualenv_path }}" - virtualenv_python: "python3.6" + virtualenv_python: "python3.10" tags: - common From 3970257c8e9f59fa509c5f0849862e0825916878 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Thu, 23 Feb 2023 15:08:40 +0530 Subject: [PATCH 077/203] python version upgrade for Ubuntu 22.04 --- ansible/roles/portal-dashboard/tasks/main.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ansible/roles/portal-dashboard/tasks/main.yml b/ansible/roles/portal-dashboard/tasks/main.yml index f822cd3881..ac2869febb 100644 --- a/ansible/roles/portal-dashboard/tasks/main.yml +++ b/ansible/roles/portal-dashboard/tasks/main.yml @@ -8,11 +8,16 @@ tags: - common +- name: Install virtualenv via pip + pip: + name: virtualenv + executable: pip3 + - name: Installing package pip: name: "{{library_path}}" virtualenv: "{{ virtualenv_path }}" - virtualenv_python: "python3.10" + virtualenv_python: "python3.6" tags: - common From e743f81d70fbc1ffc4fbd1778462bc45cc7e95b3 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Thu, 23 Feb 2023 15:29:06 +0530 Subject: [PATCH 078/203] python version upgrade for Ubuntu 22.04 --- ansible/roles/portal-dashboard/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/portal-dashboard/tasks/main.yml b/ansible/roles/portal-dashboard/tasks/main.yml index ac2869febb..5d3bb3bda8 100644 --- a/ansible/roles/portal-dashboard/tasks/main.yml +++ b/ansible/roles/portal-dashboard/tasks/main.yml @@ -17,7 +17,7 @@ pip: name: "{{library_path}}" virtualenv: "{{ virtualenv_path }}" - virtualenv_python: "python3.6" + # virtualenv_python: "python3.6" tags: - common From 0cd35ac28f971cf58235d91dfa3147e5f4073bb4 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Thu, 23 Feb 2023 15:32:42 +0530 Subject: [PATCH 079/203] removing virtualenv_python value in install package to pick the default python --- ansible/roles/portal-dashboard/tasks/main.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ansible/roles/portal-dashboard/tasks/main.yml b/ansible/roles/portal-dashboard/tasks/main.yml index 5d3bb3bda8..7c53a72307 100644 --- a/ansible/roles/portal-dashboard/tasks/main.yml +++ b/ansible/roles/portal-dashboard/tasks/main.yml @@ -8,11 +8,6 @@ tags: - common -- name: Install virtualenv via pip - pip: - name: virtualenv - executable: pip3 - - name: Installing package pip: name: "{{library_path}}" From 8de25b9e9fd052f5aca3152e6a588679e443423c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 20 Mar 2023 10:52:45 +1100 Subject: [PATCH 080/203] added postgres ssl mode Signed-off-by: Deepak Devadathan --- .../main/scala/org/sunbird/dp/core/util/PostgresConnect.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index 0705498767..20651cdcc9 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -26,6 +26,9 @@ class PostgresConnect(config: PostgresConnectionConfig) { source.setPassword(config.password) source.setDatabaseName(config.database) source.setMaxConnections(config.maxConnections) + source.setSsl(true); + source.setSslMode("require"); + source.setSslFactory("org.postgresql.ssl.NonValidatingFactory"); } @throws[Exception] From b4e61c6c00bb68f692263202521a2dc0de0bd4e2 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 23 Mar 2023 16:00:06 +1100 Subject: [PATCH 081/203] added ssl property for pg connection Signed-off-by: Deepak Devadathan --- data-pipeline-flink/dp-core/pom.xml | 9 +++++++-- .../scala/org/sunbird/dp/core/util/PostgresConnect.scala | 7 ++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/data-pipeline-flink/dp-core/pom.xml b/data-pipeline-flink/dp-core/pom.xml index cfe8164c5f..a33c0f93e4 100644 --- a/data-pipeline-flink/dp-core/pom.xml +++ b/data-pipeline-flink/dp-core/pom.xml @@ -64,11 +64,16 @@ cassandra-driver-core 3.7.0 - + + + org.postgresql + postgresql + 42.6.0 + org.scalatest scalatest_2.12 diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index 20651cdcc9..8d3ae14b83 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -3,6 +3,7 @@ package org.sunbird.dp.core.util import java.sql.{Connection, ResultSet, SQLException} import org.postgresql.ds.PGPoolingDataSource +import org.postgresql.ssl.NonValidatingFactory final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int) @@ -26,9 +27,9 @@ class PostgresConnect(config: PostgresConnectionConfig) { source.setPassword(config.password) source.setDatabaseName(config.database) source.setMaxConnections(config.maxConnections) - source.setSsl(true); - source.setSslMode("require"); - source.setSslFactory("org.postgresql.ssl.NonValidatingFactory"); + source.setProperty("ssl", "true") + source.setProperty("sslmode", "require") + source.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory") } @throws[Exception] From 5515f7769ce0d96e5dd715fce0249d02258d5538 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 23 Mar 2023 16:37:39 +1100 Subject: [PATCH 082/203] added conditional selection of pg db name Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index cbafb57c22..aec5c41795 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -594,7 +594,13 @@ device-profile-updater: port = {{ redis_meta_device_port }} } postgres { + +{% if dp_ssl == "true" %} + database = "{{ postgres.dp_db_name }}", +{% else %} database = "{{ postgres.db_name }}", +{% endif %} + table = "{{ device_profile_table }}" } From b21d7d26f5f85bdac35b54503decb576f413b429 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 23 Mar 2023 16:38:59 +1100 Subject: [PATCH 083/203] update the condtion check Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index aec5c41795..04dba56e4b 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -595,7 +595,7 @@ device-profile-updater: } postgres { -{% if dp_ssl == "true" %} +{% if postgres.dp_ssl == "true" %} database = "{{ postgres.dp_db_name }}", {% else %} database = "{{ postgres.db_name }}", From 419f9f4b7ce65e5a42306adb5d2c0286377188a1 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 23 Mar 2023 16:49:00 +1100 Subject: [PATCH 084/203] changed the condition Signed-off-by: Deepak Devadathan --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 04dba56e4b..4122f9ea07 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -595,7 +595,7 @@ device-profile-updater: } postgres { -{% if postgres.dp_ssl == "true" %} +{% if postgres.dp_ssl_mode == "require" %} database = "{{ postgres.dp_db_name }}", {% else %} database = "{{ postgres.db_name }}", From ddadf4ca887bb63d0855308dfedeeef4453db58e Mon Sep 17 00:00:00 2001 From: ali_shemshadi Date: Fri, 7 Apr 2023 12:18:46 +0800 Subject: [PATCH 085/203] bring bds changes from local repository --- ansible/oci-bds-spark.provision.yml | 18 + .../data-products-deploy/defaults/main.yml | 8 +- .../roles/data-products-deploy/tasks/main.yml | 14 +- .../templates/cluster-config.json.j2 | 30 +- .../templates/submit-script.j2 | 39 ++- .../oci-bds-spark-cluster/defaults/main.yml | 7 + .../oci-bds-spark-cluster/tasks/main.yml | 13 + .../templates/create-cluster-with-sleep.sh.j2 | 331 ++++++++++++++++++ .../templates/create-cluster.sh.j2 | 329 +++++++++++++++++ .../templates/delete-cluster.sh.j2 | 34 ++ .../defaults/main.yml | 39 +++ .../tasks/main.yml | 89 +++++ .../deploy/spark-cluster-deploy/Jenkinsfile | 2 +- pipelines/provision/spark/Jenkinsfile.bds | 57 +++ .../provision/spark/Jenkinsfile.bds.test | 60 ++++ pipelines/provision/spark/Jenkinsfile.delete | 16 +- 16 files changed, 1063 insertions(+), 23 deletions(-) create mode 100644 ansible/oci-bds-spark.provision.yml create mode 100644 ansible/roles/oci-bds-spark-cluster/defaults/main.yml create mode 100644 ansible/roles/oci-bds-spark-cluster/tasks/main.yml create mode 100755 ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 create mode 100644 ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 create mode 100755 ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 create mode 100644 ansible/roles/provision-oci-spark-cluster/defaults/main.yml create mode 100644 ansible/roles/provision-oci-spark-cluster/tasks/main.yml create mode 100644 pipelines/provision/spark/Jenkinsfile.bds create mode 100644 pipelines/provision/spark/Jenkinsfile.bds.test diff --git a/ansible/oci-bds-spark.provision.yml b/ansible/oci-bds-spark.provision.yml new file mode 100644 index 0000000000..4eb80aa296 --- /dev/null +++ b/ansible/oci-bds-spark.provision.yml @@ -0,0 +1,18 @@ +- hosts: local + become: yes + vars_files: + - "{{inventory_dir}}/secrets.yml" + roles: + - oci-bds-spark-cluster + tags: + - copy-script + +- hosts: bds-livy-node + become: yes + gather_facts: no + vars_files: + - "{{inventory_dir}}/secrets.yml" + roles: + - provision-oci-spark-cluster + tags: + - spark-provision diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 690c51d87d..7eb22c7a18 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -4,7 +4,7 @@ spark_output_temp_dir: /mount/data/analytics/tmp/ bucket: "telemetry-data-store" secor_bucket: "telemetry-data-store" -dp_object_store_type: "azure" +dp_object_store_type: "oci" dp_raw_telemetry_backup_location: "unique/raw/" dp_storage_key_config: "azure_storage_key" dp_storage_secret_config: "azure_storage_secret" @@ -210,9 +210,9 @@ admin_password: "{{ spark_cluster_user_password }}" spark_cluster_name: "{{env}}-spark-cluster" spark_cluster: - executor_core: 5 - executor_memory: 19G - num_executors: 5 + executor_core: 1 + executor_memory: 2G + num_executors: 1 analytics_cluster: home: "/tmp" diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index c659f75113..733c416138 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -6,14 +6,14 @@ - always - name: Ensure oci oss bucket exists - command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" + command: "oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket - command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -32,7 +32,7 @@ - dataproducts-spark-cluster - name: Copy Core Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 when: dp_object_store_type == "oci" diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index e899827fdb..12ebf0bde0 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -27,7 +27,7 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "s3") %} { "jars": [ "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -55,4 +55,32 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } +{% elif (dp_object_store_type == "oci") %} +{ + "jars": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/submit-script.j2 b/ansible/roles/data-products-deploy/templates/submit-script.j2 index e8341dc1e8..edd03ff36b 100644 --- a/ansible/roles/data-products-deploy/templates/submit-script.j2 +++ b/ansible/roles/data-products-deploy/templates/submit-script.j2 @@ -1,6 +1,7 @@ #!/usr/bin/env bash ## Job to run daily + cd "{{ analytics_cluster.home }}" source model-config.sh today=$(date "+%Y-%m-%d") @@ -79,7 +80,15 @@ submit_cluster_job() { requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - response=$(curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: admin_name }}") +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} echo "Submitted job for batchNumer $i below is the response" echo $response } @@ -118,7 +127,15 @@ if [ "$mode" = "via-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} done elif [ "$mode" = "parallel-jobs" ]; then @@ -157,8 +174,15 @@ elif [ "$mode" = "selected-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" - +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} else if [ -z "$start_date" ]; then echo "Running $job without partition via run-job." @@ -179,5 +203,14 @@ else requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + fi diff --git a/ansible/roles/oci-bds-spark-cluster/defaults/main.yml b/ansible/roles/oci-bds-spark-cluster/defaults/main.yml new file mode 100644 index 0000000000..95b0b73e0d --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/defaults/main.yml @@ -0,0 +1,7 @@ + +spark-folder: /usr/hdp/current/spark2-client +guava_version: 19.0 +log4j_version: 2.5 +guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar +log4j_core_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-core/{{log4j_version}}/log4j-core-{{log4j_version}}.jar +log4j_api_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-api/{{log4j_version}}/log4j-api-{{log4j_version}}.jar diff --git a/ansible/roles/oci-bds-spark-cluster/tasks/main.yml b/ansible/roles/oci-bds-spark-cluster/tasks/main.yml new file mode 100644 index 0000000000..d8f4d3cc50 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/tasks/main.yml @@ -0,0 +1,13 @@ +- name: copy cluster creation script + template: + src: create-cluster.sh.j2 + dest: /tmp/create-cluster.sh + mode: 0755 + when: cluster_state == "create_cluster" + +- name: copy cluster deletion script + template: + src: delete-cluster.sh.j2 + dest: /tmp/delete-cluster.sh + mode: 0755 + when: cluster_state == "delete_cluster" diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 new file mode 100755 index 0000000000..54784435d7 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 @@ -0,0 +1,331 @@ +#! /bin/bash +# Subnet id will generate from env variable + +ambari_user="{{ambari_user}}" +cluster_password="{{cluster_password}}" +key_alias="{{key_alias}}" +user_id="{{user_id}}" +subnet="{{subnet_id}}" +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" +workernode="{{workernode}}" +cluster_public_key="{{public_key}}" + +AMBARI_USER=$ambari_user +AMBARI_PWD=$cluster_password + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo "BDS ID" + echo $bdsid + done +} + +function getLivyip() { + + export bds_instance_id=$bdsid + bdsjson=$(oci bds instance get --bds-instance-id $bds_instance_id) + # echo "AMBARI URL" + ambari_url=`echo $bdsjson | jq -r '.data["cluster-details"]["ambari-url"]'` + # echo $ambari_url + livyip="NULL" + cnode="UTILITY" + for k in $(jq '.data["nodes"] | keys | .[]' <<< "$bdsjson"); do + node=`echo $bdsjson | jq -r '.data["nodes"]['$k']["node-type"]'` + if [ $node = "$cnode" ]; then + livyip=`echo $bdsjson | jq -r '.data["nodes"]['$k']["ip-address"]'` + fi + done + echo "LIVY IP" + echo $livyip + +} + +getlivyclustername() { + cdet=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$livyip:7183/api/v1/clusters/) + echo $cdet + for k in $(jq '.items | keys | .[]' <<< "$cdet"); do + # echo $k + cluster_name=`echo $cdet | jq -r '.items['$k']["Clusters"]["cluster_name"]'` + echo $cluster_name + done + echo "CLUSTER NAME" + +} + +function get_apidetails() { + + export bds_instance_id=$bdsid + + listapijson=$(oci bds bds-api-key list --bds-instance-id $bds_instance_id) + + #echo $listapijson | jq '.data[1]["key-alias"]' + id="NULL" + ctype="ACTIVE" + for k in $(jq '.data | keys | .[]' <<< "$listapijson"); do + type=`echo $listapijson | jq -r '.data['$k']["lifecycle-state"]'` + if [ $type = "$ctype" ]; then + id=`echo $listapijson | jq -r '.data['$k']["id"]'` + fi + done + + echo $id + + export api_key_id=$id + + list_api=`oci bds bds-api-key get --api-key-id $api_key_id --bds-instance-id $bds_instance_id` + + #echo $list_api | jq '.data' + + data=`echo $list_api | jq '.data'` + echo "API DETAILS" + echo $data + region=`echo $list_api | jq -r '.data["default-region"]'` + fingerprint=`echo $list_api | jq -r '.data["fingerprint"]'` + keyalias=`echo $list_api | jq -r '.data["key-alias"]'` + lifecyc=`echo $list_api | jq -r '.data["lifecycle-state"]'` + tm=`echo $list_api | jq -r '.data["time-created"]'` + usid=`echo $list_api | jq -r '.data["user-id"]'` + tenid=`echo $list_api | jq -r '.data["tenant-id"]'` + pemfilepath=`echo $list_api | jq -r '.data["pemfilepath"]'` + +} + +function update_bds_config(){ + #change below variables for your cluster + CONFIG_FILE_TO_UPDATE="" + + #Used when for restarting components after config update + #Wait time before we poll for restart status. Default 30 seconds. Meaning, We poll for restart status every 30 seconds + WAIT_TIME_IN_SEC=30 + + #No of tries before we give up on the restart status. Default 20. With default WAIT_TIME_IN_SEC as 30, At max we wait for 10(20*30=600 seconds) minutes before we give up. + RETRY_COUNT=20 + + #INTERNAL USE ONLY + propObj="" + + get_apidetails + getUtilityNodesIps=$livyip + getlivyclustername + echo $getUtilityNodesIps + getClusterName=$cluster_name + for utilityNodeIp in $getUtilityNodesIps + do + echo "Current utility node ip: $utilityNodeIp" + str1=$(nslookup $utilityNodeIp | awk -v var=$utilityNodeIp '/name =/{print var "\t", $4}') + CONFIG_FILE_TO_UPDATE="core-site" #this is the file we're updating in this example + propObj=$(get_property_json) + echo $propObj + echo "calling add properties" + + #update key value pairs. Multiple key value pairs can be updated before doing update_ambari_config + add_properties "fs.oci.client.auth.fingerprint" $fingerprint + add_properties "fs.oci.client.auth.passphrase" $passphrase + add_properties "fs.oci.client.auth.pemfilepath" $pemfilepath + add_properties "fs.oci.client.auth.tenantId" $tenid + add_properties "fs.oci.client.auth.userId" $usid + add_properties "fs.oci.client.regionCodeOrId" $region + #Update it to ambari + echo "updating ambari config" + update_ambari_config + + # echo "restarting all required components" + # restart_required_components + + done + +} + + +#Method to collect the current config +function get_property_json(){ + allConfs=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName?fields=Clusters/desired_configs) #to get all the configs + currVersionLoc=".Clusters.desired_configs.\"$CONFIG_FILE_TO_UPDATE\".tag" #fetching current version for property + propVersion=$(echo $allConfs | jq $currVersionLoc | tr -d '"') + propJson=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X GET "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/configurations?type=$CONFIG_FILE_TO_UPDATE&tag=$propVersion") #fetch property json + propLoc=".items[].properties" + propKeyVal=$(echo $propJson | jq $propLoc) + propObj="{\"properties\":$propKeyVal}" + echo $propObj +} + +#Method to add/update key value pair to existing config +function add_properties(){ + echo $1 $2 + echo $propObj + propObj=$(echo $propObj | jq '.properties += { "'$1'": "'$2'" }') + echo $propObj +} + +#Method to update config in ambari +function update_ambari_config(){ + parseableAddedProp=$(echo $propObj | jq '.properties') + echo $parseableAddedProp + timestamp=$(date +%s) + newVersion="version$timestamp" + finalJson='[{"Clusters":{"desired_config":[{"type":"'$CONFIG_FILE_TO_UPDATE'","tag":"'$newVersion'","properties":'$parseableAddedProp'}]}}]' + echo "CALING AMABRI API" + response_body_amb=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X PUT -d "$finalJson" "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName") + echo $response_body_amb + echo "DONE AMABRI API" +} + +#Method to restart required components +function restart_required_components(){ + echo "restarting all required components" + response_body=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart all required services from bootstrap script","operation_level":"host_component"},"Requests/resource_filters":[{"hosts_predicate":"HostRoles/stale_configs=true&HostRoles/cluster_name='$getClusterName'"}]}' "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests") + + echo "printing response_body: $response_body" + + idLoc=".Requests.id" + requestId=$(echo $response_body | jq $idLoc) + echo "request id is : $requestId" + + current_count=0 + while [[ $current_count -lt $RETRY_COUNT ]]; + do + current_count=$((current_count+1)) + response=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests/$requestId) + request_status=$(echo $response | jq -r ".Requests.request_status") + echo "printing request_status: $request_status" + if [[ $request_status == "IN_PROGRESS" ]] || [[ $request_status == "PENDING" ]]; then + echo "current_count is : $current_count" + sleep $WAIT_TIME_IN_SEC + elif [[ $request_status == "COMPLETED" ]]; then + echo "Restart successful" + break + fi + done +} + +function creat_api(){ + export bds_instance_id=$bdsid + export key_alias=$key_alias # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-key-alias + export passphrase=$b64p # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-passphrase + export user_id=$user_id + oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id +} + +function restart_bds_cluster() { + # oci cli command to stop + echo "STOPPING CLUSTER" + oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true + sleep 10m + # oci cli command to start + echo "STARTING CLUSTER" + oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p + sleep 15m +} + + +# Below is tenancy + +function create_cluster() { + + export compartment_id=$compartment_id + + master=1 + utility=1 + + worker=$workernode # This has to be replaced with Jenkins Paramter + + # Begin script in case all parameters are correct + echo "Generating json woth $master master ndoes $utility utility nodes and $worker worker nodes" + json="[" + + for i in `seq 1 $master` + do + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $utility` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $worker` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + json="$json]" + printf "$json" > "nodes.json" + echo "File successfully generated and saved as nodes.json" + + echo "CREATING THE BDS CLUSTER" + + export cluster_public_key=$public_key + export cluster_version="ODH2_0" + export display_name=$display_name + export is_high_availability='false' + export is_secure='false' + + cmd="oci bds instance create --cluster-admin-password '$b64p' --cluster-public-key '$cluster_public_key' --cluster-version '$cluster_version' --compartment-id '$compartment_id' --display-name '$display_name' --is-high-availability $is_high_availability --is-secure $is_secure --nodes file://nodes.json " + echo $cmd + eval "$cmd" + +} + +function replace_host() { + echo "REPLACE THE HOSTS" + echo "" >> {{inventory_dir}}/hosts + echo "[bds-livy-node]" >> {{inventory_dir}}/hosts + echo "$livyip ansible_ssh_user=opc" >> {{inventory_dir}}/hosts + echo "" >> {{inventory_dir}}/hosts + +} + +# MAIN TO START + +b64p=`echo -n $cluster_password | base64` +echo $b64p +echo $compartment_id + +echo "CREATING CLUSTER" + +create_cluster + +echo "WAITING CLUSTER TO CREATE" + +sleep 42m + +echo "FETCHING BDS ID" + +get_bdsid # This sets BDS ID + +echo "GET LIVY-AMBARI IP" + +getLivyip # This will be ambari ip also + +replace_host + +echo "CREATE OBJECT STORAGE API KEY" + +creat_api + +echo "WAITING FOR API TO CREATE" + +sleep 5m + +echo "UPDATE BDS AMBARI CONFIG" + +get_apidetails + +update_bds_config + +restart_bds_cluster \ No newline at end of file diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 new file mode 100644 index 0000000000..e4295dded8 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -0,0 +1,329 @@ +#! /bin/bash +# Subnet id will generate from env variable +# Version 1 Running Fine +ambari_user="{{ambari_user}}" +cluster_password="{{cluster_password}}" +key_alias="{{key_alias}}" +user_id="{{user_id}}" +subnet="{{subnet_id}}" +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" +workernode="{{workernode}}" +cluster_public_key="{{public_key}}" + +cstate='SUCCEEDED' +cwait=2500 + +echo "RECEIVED ALL ENV VARIABLES" + +AMBARI_USER=$ambari_user +AMBARI_PWD=$cluster_password + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo "BDS ID" + echo $bdsid + done +} + +function getLivyip() { + + export bds_instance_id=$bdsid + bdsjson=$(oci bds instance get --bds-instance-id $bds_instance_id) + # echo "AMBARI URL" + ambari_url=`echo $bdsjson | jq -r '.data["cluster-details"]["ambari-url"]'` + # echo $ambari_url + livyip="NULL" + cnode="UTILITY" + for k in $(jq '.data["nodes"] | keys | .[]' <<< "$bdsjson"); do + node=`echo $bdsjson | jq -r '.data["nodes"]['$k']["node-type"]'` + if [ $node = "$cnode" ]; then + livyip=`echo $bdsjson | jq -r '.data["nodes"]['$k']["ip-address"]'` + fi + done + echo "LIVY IP" + echo $livyip + +} + +getlivyclustername() { + cdet=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$livyip:7183/api/v1/clusters/) + echo $cdet + for k in $(jq '.items | keys | .[]' <<< "$cdet"); do + # echo $k + cluster_name=`echo $cdet | jq -r '.items['$k']["Clusters"]["cluster_name"]'` + echo $cluster_name + done + echo "CLUSTER NAME" + +} + +function get_apidetails() { + + export bds_instance_id=$bdsid + + listapijson=$(oci bds bds-api-key list --bds-instance-id $bds_instance_id) + + #echo $listapijson | jq '.data[1]["key-alias"]' + id="NULL" + ctype="ACTIVE" + for k in $(jq '.data | keys | .[]' <<< "$listapijson"); do + type=`echo $listapijson | jq -r '.data['$k']["lifecycle-state"]'` + if [ $type = "$ctype" ]; then + id=`echo $listapijson | jq -r '.data['$k']["id"]'` + fi + done + + echo $id + + export api_key_id=$id + + list_api=`oci bds bds-api-key get --api-key-id $api_key_id --bds-instance-id $bds_instance_id ` + + #echo $list_api | jq '.data' + + data=`echo $list_api | jq '.data'` + echo "API DETAILS" + echo $data + region=`echo $list_api | jq -r '.data["default-region"]'` + fingerprint=`echo $list_api | jq -r '.data["fingerprint"]'` + keyalias=`echo $list_api | jq -r '.data["key-alias"]'` + lifecyc=`echo $list_api | jq -r '.data["lifecycle-state"]'` + tm=`echo $list_api | jq -r '.data["time-created"]'` + usid=`echo $list_api | jq -r '.data["user-id"]'` + tenid=`echo $list_api | jq -r '.data["tenant-id"]'` + pemfilepath=`echo $list_api | jq -r '.data["pemfilepath"]'` + +} + +function update_bds_config(){ + #change below variables for your cluster + CONFIG_FILE_TO_UPDATE="" + + #Used when for restarting components after config update + #Wait time before we poll for restart status. Default 30 seconds. Meaning, We poll for restart status every 30 seconds + WAIT_TIME_IN_SEC=30 + + #No of tries before we give up on the restart status. Default 20. With default WAIT_TIME_IN_SEC as 30, At max we wait for 10(20*30=600 seconds) minutes before we give up. + RETRY_COUNT=20 + + #INTERNAL USE ONLY + propObj="" + + get_apidetails + getUtilityNodesIps=$livyip + getlivyclustername + echo $getUtilityNodesIps + getClusterName=$cluster_name + for utilityNodeIp in $getUtilityNodesIps + do + echo "Current utility node ip: $utilityNodeIp" + str1=$(nslookup $utilityNodeIp | awk -v var=$utilityNodeIp '/name =/{print var "\t", $4}') + CONFIG_FILE_TO_UPDATE="core-site" #this is the file we're updating in this example + propObj=$(get_property_json) + echo $propObj + echo "calling add properties" + + #update key value pairs. Multiple key value pairs can be updated before doing update_ambari_config + add_properties "fs.oci.client.auth.fingerprint" $fingerprint + add_properties "fs.oci.client.auth.passphrase" $cluster_password + add_properties "fs.oci.client.auth.pemfilepath" $pemfilepath + add_properties "fs.oci.client.auth.tenantId" $tenid + add_properties "fs.oci.client.auth.userId" $usid + add_properties "fs.oci.client.regionCodeOrId" $region + #Update it to ambari + echo "updating ambari config" + update_ambari_config + + echo "restarting all required components" + restart_required_components + + done + +} + + +#Method to collect the current config +function get_property_json(){ + allConfs=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName?fields=Clusters/desired_configs) #to get all the configs + currVersionLoc=".Clusters.desired_configs.\"$CONFIG_FILE_TO_UPDATE\".tag" #fetching current version for property + propVersion=$(echo $allConfs | jq $currVersionLoc | tr -d '"') + propJson=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X GET "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/configurations?type=$CONFIG_FILE_TO_UPDATE&tag=$propVersion") #fetch property json + propLoc=".items[].properties" + propKeyVal=$(echo $propJson | jq $propLoc) + propObj="{\"properties\":$propKeyVal}" + echo $propObj +} + +#Method to add/update key value pair to existing config +function add_properties(){ + echo $1 $2 + echo $propObj + propObj=$(echo $propObj | jq '.properties += { "'$1'": "'$2'" }') + echo $propObj +} + +#Method to update config in ambari +function update_ambari_config(){ + parseableAddedProp=$(echo $propObj | jq '.properties') + echo $parseableAddedProp + timestamp=$(date +%s) + newVersion="version$timestamp" + finalJson='[{"Clusters":{"desired_config":[{"type":"'$CONFIG_FILE_TO_UPDATE'","tag":"'$newVersion'","properties":'$parseableAddedProp'}]}}]' + echo "CALING AMABRI API" + response_body_amb=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X PUT -d "$finalJson" "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName") + echo $response_body_amb + echo "DONE AMABRI API" +} + +#Method to restart required components +function restart_required_components(){ + echo "restarting all required components" + response_body=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart all required services from bootstrap script","operation_level":"host_component"},"Requests/resource_filters":[{"hosts_predicate":"HostRoles/stale_configs=true&HostRoles/cluster_name='$getClusterName'"}]}' "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests") + + echo "printing response_body: $response_body" + + idLoc=".Requests.id" + requestId=$(echo $response_body | jq $idLoc) + echo "request id is : $requestId" + + current_count=0 + while [[ $current_count -lt $RETRY_COUNT ]]; + do + current_count=$((current_count+1)) + response=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests/$requestId) + request_status=$(echo $response | jq -r ".Requests.request_status") + echo "printing request_status: $request_status" + if [[ $request_status == "IN_PROGRESS" ]] || [[ $request_status == "PENDING" ]]; then + echo "current_count is : $current_count" + sleep $WAIT_TIME_IN_SEC + elif [[ $request_status == "COMPLETED" ]]; then + echo "Restart successful" + break + fi + done +} + +function create_api(){ + export bds_instance_id=$bdsid + export key_alias=$key_alias # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-key-alias + export passphrase=$b64p # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-passphrase + export user_id=$user_id + capi='SUCCEEDED' + oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id --wait-for-state $capi --max-wait-seconds $cwait +} + +function restart_bds_cluster() { + # oci cli command to stop + echo "STOPPING CLUSTER" + cstate='SUCCEEDED' + cwait=2000 + oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true --wait-for-state=$cstate --max-wait-seconds $cwait + # oci cli command to start + echo "STARTING CLUSTER" + cstate='SUCCEEDED' + oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --wait-for-state=$cstate --max-wait-seconds $cwait +} + + +# Below is tenancy + +function create_cluster() { + + export compartment_id=$compartment_id + + master=1 + utility=1 + + worker=$workernode # This has to be replaced with Jenkins Paramter + + # Begin script in case all parameters are correct + echo "Generating json woth $master master ndoes $utility utility nodes and $worker worker nodes" + json="[" + + for i in `seq 1 $master` + do + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $utility` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + for i in `seq 1 $worker` + do + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + done + + json="$json]" + printf "$json" > "nodes.json" + echo "File successfully generated and saved as nodes.json" + + echo "TRIGGERED CREATING THE BDS CLUSTER" + + export cluster_public_key=$public_key + export cluster_version="ODH2_0" + export display_name=$display_name + export is_high_availability='false' + export is_secure='false' + cmd="oci bds instance create --cluster-admin-password '$b64p' --cluster-public-key '$cluster_public_key' --cluster-version '$cluster_version' --compartment-id '$compartment_id' --display-name '$display_name' --is-high-availability $is_high_availability --is-secure $is_secure --wait-for-state $cstate --max-wait-seconds $cwait --nodes file://nodes.json " + #echo $cmd + create_response=$(eval "$cmd") + echo "CLUSTER CREATED SUCCESSFULLY" +} + +function replace_host() { + echo "REPLACE THE HOSTS" + echo "" >> {{inventory_dir}}/hosts + echo "[bds-livy-node]" >> {{inventory_dir}}/hosts + echo "$livyip ansible_ssh_user=opc" >> {{inventory_dir}}/hosts + echo "" >> {{inventory_dir}}/hosts + +} + +# MAIN TO START + +b64p=`echo -n $cluster_password | base64` +echo $b64p +echo $compartment_id + +echo "CREATING BDS CLUSTER" + +create_cluster + +echo "FETCHING BDS ID" + +get_bdsid # This sets BDS ID + +echo "GET LIVY-AMBARI IP" + +getLivyip # This will be ambari ip also + +replace_host + +echo "CREATE OBJECT STORAGE API KEY" + +create_api + +echo "UPDATE BDS AMBARI CONFIG" + +get_apidetails + +update_bds_config + +# restart_bds_cluster diff --git a/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 new file mode 100755 index 0000000000..b5e1d28d36 --- /dev/null +++ b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 @@ -0,0 +1,34 @@ +#!/bin/bash + +compartment_id="{{compartment_id}}" +display_name="{{display_name}}" + + +echo "DELETE STARTED" +echo $display_name +echo $compartment_id + + +function get_bdsid() { + list_param=`oci bds instance list --compartment-id $compartment_id` + bdsid="NULL" + # echo $list_param | jq '.data' + state="ACTIVE" + disname="NULL" + for k in $(jq '.data | keys | .[]' <<< "$list_param"); do + # echo $k + cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` + if [ $cstate = $state ]; then + disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` + if [ $disname = $display_name ]; then + bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` + fi + + fi + echo $bdsid + done +} + +get_bdsid + +yes Y | oci bds instance delete --bds-instance-id $bdsid diff --git a/ansible/roles/provision-oci-spark-cluster/defaults/main.yml b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml new file mode 100644 index 0000000000..11e3e6357b --- /dev/null +++ b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml @@ -0,0 +1,39 @@ + +bucket: "telemetry-data-store" +model_version: "2.0" + +spark_folder: /usr/odh/2.0.1/spark + +# delete +guava_default_version: 14.0.1 +guava_default_jre_version_1: 26.0-jre +guava_default_jre_version_2: 27.0-jre +guice_default_version: 4.2.2 + +# add +guava_version: 19.0 +log4j_version: 2.16.0 +spark_redis_version: 2.5.0 +guava_jre_version: 24.1.1-jre +jedis_version: 3.2.0 +zip4j_version: 2.6.2 +guice_version: 3.0 + +jets3t_version: 0.9.4 +hadoop_aws_version: 2.7.3 +java_xmlbuilder_version: 1.1 + +guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar +guava_jre_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_jre_version}}/guava-{{guava_jre_version}}.jar +log4j_core_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-core/{{log4j_version}}/log4j-core-{{log4j_version}}.jar +log4j_api_url: https://repo1.maven.org/maven2/org/apache/logging/log4j/log4j-api/{{log4j_version}}/log4j-api-{{log4j_version}}.jar +spark_redis_url: https://repo1.maven.org/maven2/com/redislabs/spark-redis_2.12/{{spark_redis_version}}/spark-redis_2.12-{{spark_redis_version}}.jar +jedis_url: https://repo1.maven.org/maven2/redis/clients/jedis/{{jedis_version}}/jedis-{{jedis_version}}.jar +zip4j_url: https://repo1.maven.org/maven2/net/lingala/zip4j/zip4j/{{zip4j_version}}/zip4j-{{zip4j_version}}.jar +guice_url: https://repo1.maven.org/maven2/com/google/inject/guice/{{guice_version}}/guice-{{guice_version}}.jar +guice_servlet_url: https://repo1.maven.org/maven2/com/google/inject/extensions/guice-servlet/{{guice_version}}/guice-servlet-{{guice_version}}.jar + +jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +hadoop_aws_url: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/{{hadoop_aws_version}}/hadoop-aws-{{hadoop_aws_version}}.jar +java_xmlbuilder_url: https://repo1.maven.org/maven2/com/jamesmurty/utils/java-xmlbuilder/{{java_xmlbuilder_version}}/java-xmlbuilder-{{java_xmlbuilder_version}}.jar + diff --git a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml new file mode 100644 index 0000000000..9df37915bd --- /dev/null +++ b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml @@ -0,0 +1,89 @@ +# - name: Adding azure blob variable to spark env file + # lineinfile: + # path: "{{spark_folder}}/conf/spark-env.sh" + # line: '{{item.var}}={{item.value}}' + # regexp: "{{ item.var }}.*" + # with_items: + # - {var: 'azure_storage_key', value: '{{ azure_private_storage_account_name }}'} + # - {var: 'azure_storage_secret', value: '{{ azure_private_storage_account_key }}'} + # no_log: true + # when: cloud_service_provider == "azure" + +- name: Remove guava-jre, guice default jars + become: yes + file: + path: "{{ spark_folder }}/jars/{{item.var}}-{{item.value}}.jar" + state: absent + with_items: + - {var: 'guava', value: '{{ guava_default_version }}'} + - {var: 'guava', value: '{{ guava_default_jre_version_1 }}'} + - {var: 'guava', value: '{{ guava_default_jre_version_2 }}'} + - {var: 'guice', value: '{{ guice_default_version }}'} + - {var: 'guice-servlet', value: '{{ guice_default_version }}'} + +- name: Download guava and copy to Spark jars folder + become: yes + get_url: url={{ guava_url }} dest={{ spark_folder }}/jars/guava-{{guava_version}}.jar timeout=1000 force=no + +- name: Download guava_jre_url and copy to Spark jars folder + become: yes + get_url: url={{ guava_jre_url }} dest={{ spark_folder }}/jars/guava-{{guava_jre_version}}.jar timeout=1000 force=no + +- name: Download log4j api and copy to Spark jars folder + become: yes + get_url: url={{ log4j_api_url }} dest={{ spark_folder }}/jars/log4j-api-{{log4j_version}}.jar timeout=1000 force=no + +- name: Download log4j core and copy to Spark jars folder + become: yes + get_url: url={{ log4j_core_url }} dest={{ spark_folder }}/jars/log4j-core-{{log4j_version}}.jar timeout=1000 force=no + +- name: Download spark-redis and copy to Spark jars folder + become: yes + get_url: url={{ spark_redis_url }} dest={{ spark_folder }}/jars/spark-redis_2.12-{{spark_redis_version}}.jar timeout=1000 force=no + +- name: Download jedis and copy to Spark jars folder + become: yes + get_url: url={{ jedis_url }} dest={{ spark_folder }}/jars/jedis-{{jedis_version}}.jar timeout=1000 force=no + +- name: Download zip4j and copy to Spark jars folder + become: yes + get_url: url={{ zip4j_url }} dest={{ spark_folder }}/jars/zip4j-{{zip4j_version}}.jar timeout=1000 force=no + +- name: Download guice and copy to Spark jars folder + become: yes + get_url: url={{ guice_url }} dest={{ spark_folder }}/jars/guice-{{guice_version}}.jar timeout=1000 force=no + +- name: Download guice-servlet and copy to Spark jars folder + become: yes + get_url: url={{ guice_servlet_url }} dest={{ spark_folder }}/jars/guice-servlet-{{guice_version}}.jar timeout=1000 force=no + +- name: Download jets3t and copy to Spark jars folder + become: yes + get_url: url={{ jets3t_url }} dest={{ spark_folder }}/jars/jets3t-{{jets3t_version}}.jar timeout=1000 force=no + +- name: Download hadoop_aws and copy to Spark jars folder + become: yes + get_url: url={{ hadoop_aws_url }} dest={{ spark_folder }}/jars/hadoop-aws-{{hadoop_aws_version}}.jar timeout=1000 force=no + +- name: Download java_xmlbuilder and copy to Spark jars folder + become: yes + get_url: url={{ java_xmlbuilder_url }} dest={{ spark_folder }}/jars/java-xmlbuilder-{{java_xmlbuilder_version}}.jar timeout=1000 force=no + + +- name: Download config to livy + command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf {{ spark_folder }}/conf/application.conf + + + +- name: Update log4j.properties + become: yes + blockinfile: + path: "{{ spark_folder }}/conf/log4j.properties" + block: | + log4j.logger.org.ekstep.analytics=INFO + log4j.appender.org.ekstep.analytics=org.apache.log4j.RollingFileAppender + log4j.appender.org.ekstep.analytics.File=./joblog.log + log4j.appender.org.ekstep.analytics.MaxFileSize=${log4jspark.log.maxfilesize} + log4j.appender.org.ekstep.analytics.MaxBackupIndex=${log4jspark.log.maxbackupindex} + log4j.appender.org.ekstep.analytics.layout=org.apache.log4j.PatternLayout + log4j.appender.org.ekstep.analytics.layout.ConversionPattern=%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile index 9749d35b36..926c773ff0 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--tags ${params.job_type} --extra-vars \"job_id=${params.job_id} mode=${params.mode} partitions=${params.partitions} parallelisation=${params.parallelisation} start_date=${params.start_date} end_date=${params.end_date} batch_id=${params.batch_identifier} sparkMaster=${params.sparkMaster} pause_min=${params.pause_min} selected_partitions=${params.selected_partitions}\" --vault-password-file /var/lib/jenkins/secrets/vault-pass -vvvv " + ansibleExtraArgs = "--tags ${params.job_type} --extra-vars \"job_id=${params.job_id} mode=${params.mode} partitions=${params.partitions} parallelisation=${params.parallelisation} start_date=${params.start_date} end_date=${params.end_date} batch_id=${params.batch_identifier} sparkMaster=${params.sparkMaster} pause_min=${params.pause_min} vcn_name=${params.vcn_name} bds_cluster_name=${params.bds_cluster_name} selected_partitions=${params.selected_partitions}\" --vault-password-file /var/lib/jenkins/secrets/vault-pass -vvvv " values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds new file mode 100644 index 0000000000..ae7e33e271 --- /dev/null +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -0,0 +1,57 @@ + +@Library('deploy-conf') _ +node('build-slave') { + try { + String ANSI_GREEN = "\u001B[32m" + String ANSI_NORMAL = "\u001B[0m" + String ANSI_BOLD = "\u001B[1m" + String ANSI_RED = "\u001B[31m" + String ANSI_YELLOW = "\u001B[33m" + + ansiColor('xterm') { + stage('Checkout') { + checkout scm + } + + stage('copy cluster creation script') { + values = [:] + envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() + module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() + jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() + currentWs = sh(returnStdout: true, script: 'pwd').trim() + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + values.put('currentWs', currentWs) + values.put('env', envDir) + values.put('module', module) + values.put('jobName', jobName) + values.put('ansiblePlaybook', ansiblePlaybook) + values.put('ansibleExtraArgs', ansibleExtraArgs) + println values + ansible_playbook_run(values) + } + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + ./create-cluster.sh + export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env + + export ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + //} + + } + + } + } + catch (err) { + currentBuild.result = "FAILURE" + throw err + } + +} diff --git a/pipelines/provision/spark/Jenkinsfile.bds.test b/pipelines/provision/spark/Jenkinsfile.bds.test new file mode 100644 index 0000000000..bd6de3ad34 --- /dev/null +++ b/pipelines/provision/spark/Jenkinsfile.bds.test @@ -0,0 +1,60 @@ + +@Library('deploy-conf') _ +node('build-slave') { + try { + String ANSI_GREEN = "\u001B[32m" + String ANSI_NORMAL = "\u001B[0m" + String ANSI_BOLD = "\u001B[1m" + String ANSI_RED = "\u001B[31m" + String ANSI_YELLOW = "\u001B[33m" + + ansiColor('xterm') { + stage('Checkout') { + checkout scm + } + + stage('copy cluster creation script') { + values = [:] + envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() + module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() + jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() + currentWs = sh(returnStdout: true, script: 'pwd').trim() + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.type}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + values.put('currentWs', currentWs) + values.put('env', envDir) + values.put('module', module) + values.put('jobName', jobName) + values.put('ansiblePlaybook', ansiblePlaybook) + values.put('ansibleExtraArgs', ansibleExtraArgs) + println values + ansible_playbook_run(values) + } + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + bds-livy-node-ip=params.bds-livy-node-ip + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + #./create_cluster_bds.sh + + export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env + echo "" >> $inventory_dir/hosts + echo "[bds-livy-node]" >> $inventory_dir/hosts + echo "$bds-livy-node-ip ansible_ssh_user=opc" >> $inventory_dir/hosts + echo "" >> $inventory_dir/hosts + + ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env/hosts $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + } + + } + } + catch (err) { + currentBuild.result = "FAILURE" + throw err + } + +} diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index 93aed171cb..5675a7e1df 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -1,3 +1,4 @@ + @Library('deploy-conf') _ node('build-slave') { try { @@ -18,8 +19,8 @@ node('build-slave') { module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() - ansiblePlaybook = "${currentWs}/ansible/azure-hdinsight-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"azure_resource_group=${params.resource_group} subscription_id=${env.subscription_id} tenant_id=${env.tenant_id} cluster_state=${params.type}\" --tags copy-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) @@ -29,15 +30,16 @@ node('build-slave') { println values ansible_playbook_run(values) } - stage('delete spark HDinsight cluster') { + stage('create and provision spark OCI BDS') { storage_container=params.storage_container - withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" cd /tmp - ./delete-cluster.sh $spuser $sppass + ./delete-cluster.sh ''' - } - + //} } From 84b1b8572c863b1c3915e8527a6f2fa4e3fd13c6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 7 Apr 2023 15:41:56 +1000 Subject: [PATCH 086/203] v2 base changes Signed-off-by: Deepak Devadathan --- .../defaults/main.yml | 282 ++++++++ .../collection-summary-ingestion-spec.json | 251 +++++++ .../files/sourcing-ingestion-spec.json | 146 ++++ .../tasks/main.yml | 499 +++++++++++++ .../templates/cluster-config.json.j2 | 86 +++ .../templates/common.conf.j2 | 317 +++++++++ .../templates/exhaust_sanity_check.py.j2 | 58 ++ .../templates/log4j2.xml.j2 | 54 ++ .../templates/model-config.j2 | 151 ++++ .../templates/model-config.json.j2 | 670 ++++++++++++++++++ .../templates/model-dock-config.j2 | 34 + .../templates/replay-job.j2 | 63 ++ .../templates/replay-updater.j2 | 24 + .../templates/replay-utils.j2 | 43 ++ .../templates/run-dock-job.j2 | 41 ++ .../templates/run-job.j2 | 83 +++ .../templates/start-jobmanager.j2 | 46 ++ .../templates/submit-all-jobs.rb.j2 | 58 ++ .../templates/submit-job.j2 | 22 + .../templates/submit-script.j2 | 216 ++++++ .../templates/update-job-requests.py.j2 | 119 ++++ .../data-products-deploy/defaults/main.yml | 8 +- .../roles/data-products-deploy/tasks/main.yml | 14 +- 23 files changed, 3274 insertions(+), 11 deletions(-) create mode 100755 ansible/roles/data-products-deploy-oci-bds/defaults/main.yml create mode 100644 ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json create mode 100644 ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json create mode 100644 ansible/roles/data-products-deploy-oci-bds/tasks/main.yml create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 create mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 diff --git a/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml b/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml new file mode 100755 index 0000000000..7eb22c7a18 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml @@ -0,0 +1,282 @@ +analytics_user: analytics +analytics_group: analytics +spark_output_temp_dir: /mount/data/analytics/tmp/ + +bucket: "telemetry-data-store" +secor_bucket: "telemetry-data-store" +dp_object_store_type: "oci" +dp_raw_telemetry_backup_location: "unique/raw/" +dp_storage_key_config: "azure_storage_key" +dp_storage_secret_config: "azure_storage_secret" +dp_reports_storage_key_config: "reports_azure_storage_key" +dp_reports_storage_secret_config: "reports_azure_storage_secret" + +kafka_broker_host: "{{groups['processing-cluster-kafka'][0]}}:9092" +ingestion_kafka_broker_host: "{{groups['ingestion-cluster-kafka'][0]}}:9092" +brokerlist: "{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" +zookeeper: "{{groups['processing-cluster-zookeepers']|join(':2181,')}}:2181" +dp_username: dp-monitor +analytics_job_queue_topic: "{{ env }}.analytics.job_queue" +topic: "{{ env }}.telemetry.derived" +analytics_metrics_topic: "{{ env }}.analytics_metrics" +sink_topic: "{{ env }}.telemetry.sink" +assess_topic: "{{ env }}.telemetry.assess" +metrics_topic: "{{ env }}.telemetry.metrics" +job_manager_tmp_dir: "transient-data" +channel: dev-test +druid_broker_host: "{{groups['raw-broker'][0]}}" +druid_router_host: "{{groups['raw-router'][0]}}" +druid_rollup_broker_host: "{{groups['raw-broker'][0]}}" +hierarchySearchServiceUrl: "{{ proto }}://{{ domain_name }}/action/content" +hierarchySearchServicEndpoint: /v3/hierarchy/ + +user_table_keyspace: "sunbird" +course_keyspace: "sunbird_courses" +hierarchy_store_keyspace: "{{ env }}_hierarchy_store" +job_request_table: "{{ env }}_job_request" +dataset_metadata_table: "{{ env }}_dataset_metadata" +report_user_table_keyspace: "sunbird_courses" +report_user_enrolment_table: "report_user_enrolments" + +analytics_job_list: '"wfs", "content-rating-updater", "monitor-job-summ"' +analytics_jobs_count: 3 + +cassandra_keyspace_prefix: '{{ env }}_' +report_cassandra_cluster_host: "{{ report_cassandra_host | default(core_cassandra_host) }}" +cassandra_hierarchy_store_keyspace: "{{ env_name}}_hierarchy_store" +spark_version: 3.1.3 + +heap_memory: "-Xmx5120m" + +spark: + home: "{{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7" + public_dns: 54.255.154.146 + master: + url: spark://172.31.11.117:7077 + host: 172.31.11.117 + worker: + instances: 1 + cores: 2 + memory: 4g + driver: + memory: 3g + executor: + memory: 4g + driver_memory: 7g + memory_fraction: 0.3 + storage_fraction: 0.5 + executor_memory: 2g + heap_conf_str: '"-XX:+UseG1GC -XX:MaxGCPauseMillis=100 -Xms250m {{ heap_memory }} -XX:+UseStringDeduplication"' + +submit_jobs: + submit-all-jobs: + hour: 02 + minute: 35 + +start_jobmanager: + job-manager: + hour: 02 + minute: 30 +have_weekly_jobs: false + +course_batch_status_updater_job_schedule: 60 + +run_wfs_job: + wfs: + hour: 00 + minute: 30 +run_monitor_job: + monitor-job-summ: + hour: 03 + minute: 00 + +run_admin_user_reports_job: + admin-user-reports-3AMIST: + hour: 21 + minute: 30 + admin-user-reports-2PMIST: + hour: 8 + minute: 30 + +run_admin_geo_reports_job: + admin-geo-reports-4AMIST: + hour: 22 + minute: 30 + admin-geo-reports-3PMIST: + hour: 9 + minute: 30 + +run_assessment_aggregator_report_job: + assessment-aggregator-report: + hour: 18 + minute: 35 + +update_user_redis_cache: + populate-user-cache: + hour: 3 + minute: 00 + +index_content_model_druid: + index_content: + hour: 1 + minute: 00 + +run_etb_metrics_weekly_job: + etb-metrics-weekly: + hour: 23 + minute: 30 + weekday: 1 + +# These are the dummy times till sept30 for exhaust reports +#To-Do: Update time after 3.2.7 deployment + +run_progress_exhaust: + progress-exhaust: + hour: 08 + minute: 00 + +run_response_exhaust: + response-exhaust: + hour: 09 + minute: 00 + +run_userinfo_exhaust: + userinfo-exhaust: + hour: 10 + minute: 00 + +run_collection_summary: + collection-summary: + hour: 09 + minute: 30 + +run_sourcing_summary: + sourcing-summary: + hour: 10 + minute: 30 + +run_cassandra_migration: + cassandra-migration: + hour: 19 + minute: 15 + +run_uci_private_exhaust_job: + uci-private-exhaust: + hour: 03 + minute: 00 + +run_uci_response_exhaust_job: + uci-response-exhaust: + hour: 02 + minute: 00 + + +service: + search: + url: http://{{private_ingressgateway_ip}}/search + path: /v3/search + +es_search_index: "compositesearch" +analytics: + home: /mount/data/analytics + soft_path: /mount/data/analytics + paths: ['/mount/data/analytics', '/mount/data/analytics/logs', '/mount/data/analytics/logs/services', '/mount/data/analytics/logs/data-products', '/mount/data/analytics/tmp', '/mount/data/analytics/scripts', '/mount/data/analytics/models' ] + scripts: ['model-config', 'replay-job', 'replay-updater', 'replay-utils', 'run-job', 'submit-job', 'start-jobmanager', 'submit-script'] + dockScripts: ['model-dock-config','run-dock-job'] + +# artifact versions +analytics_core_artifact_ver: "2.0" +analytics_ed_dataporducts_artifact_ver: "1.0" +scruid_artifact_ver: "2.5.0" + +producer_env: "dev.sunbird" +analytics_job_manager_artifact: "job-manager-{{ analytics_core_artifact_ver }}.jar" +analytics_core_artifact: "analytics-framework-{{ analytics_core_artifact_ver }}.jar" +scruid_artifact: "scruid_2.12-{{ scruid_artifact_ver }}.jar" +analytics_batch_module_artifact: "batch-models-{{ analytics_core_artifact_ver }}.jar" +analytics_ed_dataporducts_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}-distribution.tar.gz" +model_version: "2.0" + +submit_jobs_auth_token: "{{ sunbird_api_auth_token }}" +report_list_jobs_url: "{{ druid_report_url }}" + +reports_container: "reports" + +# Cluster vars +spark_cluster_user_password: "" +spark_cluster_user_name: "" +admin_name: "{{ spark_cluster_user_name }}" +admin_password: "{{ spark_cluster_user_password }}" +spark_cluster_name: "{{env}}-spark-cluster" + +spark_cluster: + executor_core: 1 + executor_memory: 2G + num_executors: 1 + +analytics_cluster: + home: "/tmp" + +analytics_ed_dataporducts_jar_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}.jar" + +spark_enable_dynamic_allocation: false +# Spark Cassandra config-vars +spark_cassandra_connection_timeout_millis: 30000 +spark_cassandra_query_timeout_millis: 180000 +spark_cassandra_query_max_rows_fetch_count: 1000 +spark_sql_shuffle_partitions: 200 + +druid_report_postgres_db_name: druid +druid_report_postgres_db_username: druid + + +#Override this variable in production and point to druid rollup ingestion cluster +# Example: "http://$rollup_cluster_ip:8090" +druid_rollup_cluster_ingestion_task_url: "http://{{groups['raw-overlord'][0]}}:8081" + +# On demand Exhaust throttling vars +exhaust_batches_limit_per_channel: 30 +exhaust_file_size_limit_bytes_per_channel: 1073741824 + +exhaust_parallel_batch_load_limit: 10 +exhaust_user_parallelism: 200 + +data_exhaust_batch_limit_per_request: 20 + +# Start Of UCI Related Variables +uci_postgres_host: "dev-pg11.postgres.database.azure.com" +uci_encryption_key_base64: "" +uci_bot_postgres_database: uci-botdb +uci_fusionauth_postgres_database: uci-fusionauth +uci_postgres_user: "{{postgres.db_username}}" +uci_postgres_password: "{{postgres.db_password}}" + +uci_postgres: + conversation_db_name: "{{ uci_bot_postgres_database }}" + conversation_db_host: "{{ uci_postgres_host }}" + conversation_db_port: "5432" + conversation_db_user: "{{ uci_postgres_user }}" + conversation_db_psss: "{{ uci_postgres_password }}" + conversation_table_name: "bot" + fushionauth_db_name: "{{ uci_fusionauth_postgres_database }}" + fushionauth_db_host: "{{ uci_postgres_host }}" + fushionauth_db_port: "5432" + fushionauth_db_user: "{{ uci_postgres_user }}" + fushionauth_db_psss: "{{ uci_postgres_password }}" + user_table_name: "users" + user_registration_table_name: "user_registrations" + user_identities_table_name: "identities" + +uci_encryption_secret_key: "{{uci_encryption_key_base64}}" +uci_pdata_id: "{{uci_env}}.uci.{{sunbird_instance}}" + +# End Of UCI Related Variables + +# Exhaust sanity check vars +cassandra_migrator_job_name: "Cassandra Migrator" + +assessment_metric_primary_category: "{{ exhaust_job_assessment_primary_category }}" + +# Default s3 variables +sunbird_private_s3_storage_key: "" +sunbird_private_s3_storage_secret: "" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json new file mode 100644 index 0000000000..69e13196e2 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json @@ -0,0 +1,251 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "collection-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "content_org", + "expr": "contentorg" + }, + { + "type": "root", + "name": "user_org", + "expr": "orgname" + }, + { + "type": "root", + "name": "batch_start_date", + "expr": "startdate" + }, + { + "type": "root", + "name": "batch_end_date", + "expr": "enddate" + }, + { + "type": "root", + "name": "has_certificate", + "expr": "hascertified" + }, + { + "type": "root", + "name": "collection_id", + "expr": "courseid" + }, + { + "type": "root", + "name": "batch_id", + "expr": "batchid" + }, + { + "type": "root", + "name": "collection_name", + "expr": "collectionname" + }, + { + "type": "root", + "name": "batch_name", + "expr": "batchname" + }, + { + "type": "root", + "name": "total_enrolment", + "expr": "enrolleduserscount" + }, + { + "type": "root", + "name": "total_completion", + "expr": "completionuserscount" + }, + { + "type": "root", + "name": "total_certificates_issued", + "expr": "certificateissuedcount" + }, + { + "type": "root", + "name": "content_status", + "expr": "contentstatus" + }, + { + "type": "root", + "name": "user_state", + "expr": "state" + }, + { + "type": "root", + "name": "user_district", + "expr": "district" + }, + { + "type": "root", + "name": "content_channel", + "expr": "channel" + }, + { + "type": "root", + "name": "keywords", + "expr": "keywords" + }, + { + "type": "root", + "name": "timestamp", + "expr": "timestamp" + }, + { + "type": "root", + "name": "medium", + "expr": "medium" + }, + { + "type": "root", + "name": "subject", + "expr": "subject" + }, + { + "type": "root", + "name": "created_for", + "expr": "createdfor" + }, + { + "type": "root", + "name": "user_type", + "expr": "usertype" + }, + { + "type": "root", + "name": "user_subtype", + "expr": "usersubtype" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "name": "content_org" + }, + { + "name": "user_org" + }, + { + "type": "string", + "name": "batch_id" + }, + { + "type": "string", + "name": "batch_start_date" + }, + { + "type": "string", + "name": "batch_end_date" + }, + { + "type": "string", + "name": "collection_id" + }, + { + "type": "string", + "name": "collection_name" + }, + { + "type": "string", + "name": "batch_name" + }, + { + "type": "long", + "name": "total_enrolment" + }, + { + "type": "long", + "name": "total_completion" + }, + { + "type": "long", + "name": "total_certificates_issued" + }, + { + "type": "string", + "name": "content_status" + }, + { + "type": "string", + "name": "user_state" + }, + { + "type": "string", + "name": "user_district" + }, + { + "name": "keywords" + }, + { + "name": "has_certificate" + }, + { + "type": "string", + "name": "content_channel" + }, + { + "name": "medium" + }, + { + "name": "subject" + }, + { + "name": "created_for" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "user_subtype" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "reports", + "path": "/collection-summary-reports-v2/collection-summary-report-latest.json" + } + ], + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json new file mode 100644 index 0000000000..69e773d457 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json @@ -0,0 +1,146 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "sourcing-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "program_id", + "expr": "program_id" + }, + { + "type": "root", + "name": "status", + "expr": "status" + }, + { + "type": "root", + "name": "rootorg_id", + "expr": "rootorg_id" + }, + { + "type": "root", + "name": "user_id", + "expr": "user_id" + }, + { + "type": "root", + "name": "osid", + "expr": "osid" + }, + { + "type": "root", + "name": "user_type", + "expr": "user_type" + }, + { + "type": "root", + "name": "contributor_id", + "expr": "contributor_id" + }, + { + "type": "root", + "name": "total_contributed_content", + "expr": "total_contributed_content" + }, + { + "type": "root", + "name": "primary_category", + "expr": "primary_category" + }, + { + "type": "root", + "name": "created_by", + "expr": "created_by" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "type": "string", + "name": "program_id" + }, + { + "type": "string", + "name": "status" + }, + { + "type": "string", + "name": "rootorg_id" + }, + { + "type": "string", + "name": "user_id" + }, + { + "type": "string", + "name": "osid" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "contributor_id" + }, + { + "type": "string", + "name": "primary_category" + }, + { + "type": "string", + "name": "created_by" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [ + { + "name": "total_count", + "type": "count" + } + ], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "reports", + "path": "/sourcing/SourcingSummaryReport.json" + } + ], + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} diff --git a/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml b/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml new file mode 100644 index 0000000000..733c416138 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml @@ -0,0 +1,499 @@ +## Data products deployment ## +- name: Ensure azure blob storage container exists + command: az storage container create --name {{ bucket }} + when: dp_object_store_type == "azure" + tags: + - always + +- name: Ensure oci oss bucket exists + command: "oci os bucket get --name {{ bucket }}" + register: check_bucket + when: dp_object_store_type == "oci" + tags: + - always + +- name: Create oci oss bucket + command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + when: dp_object_store_type == "oci" and check_bucket.rc !=0 + tags: + - always + +- name: Copy Core Data Products + copy: src={{ analytics_batch_module_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - dataproducts + +- name: Copy Core Data Products to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - dataproducts-spark-cluster + +- name: Copy Core Data Products to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - dataproducts-spark-cluster + +- name: Unarchive Ed Data Products + become: yes + unarchive: src={{ playbook_dir}}/{{ analytics_ed_dataporducts_artifact }} dest={{ analytics.home }}/models-{{ model_version }} copy=yes group={{ analytics_group }} owner={{ analytics_user }} + tags: + - ed-dataproducts + +- name: Copy Ed Data Products to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar -f {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - ed-dataproducts-spark-cluster + +- name: Copy Ed Data Products to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - ed-dataproducts-spark-cluster + +- name: Copy Framework Library + copy: src={{ analytics_core_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - framework + +- name: Copy Framework Library to azure blob + command: az storage blob upload --overwrite --debug -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Framework Library to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - framework-spark-cluster + +- name: Copy Scruid Library + copy: src={{ scruid_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - framework + +- name: Copy Scruid Library to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy Scruid Library to oci oss + command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - framework-spark-cluster + +- name: Copy Job Manager + copy: src={{ analytics_job_manager_artifact }} dest={{ analytics.home }}/models-{{ model_version }} + tags: + - dataproducts + +- name: Copy configuration file + template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - ed-dataproducts + - framework + when: dockdataproducts is undefined + +- name: Copy configuration file + template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/dock-{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - ed-dataproducts + - framework + when: dockdataproducts is defined + +- name: Copy configuration file as application.conf for cluster + template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/application.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - framework-spark-cluster + +- name: Update spark temp dir value for cluster + lineinfile: + path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' + regexp: '^spark_output_temp_dir="/mount/data/analytics/tmp/"' + line: 'spark_output_temp_dir="/var/log/sparkapp/tmp/"' + tags: + - framework-spark-cluster + +- name: Update logger kafka config for cluster + lineinfile: + path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' + regexp: '^log.appender.kafka.enable="false"' + line: 'log.appender.kafka.enable="true"' + tags: + - framework-spark-cluster + +- name: Copy configuration file to azure blob + command: az storage blob upload --overwrite -c {{ bucket }} -f {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf + async: 3600 + poll: 10 + when: dp_object_store_type == "azure" + tags: + - framework-spark-cluster + +- name: Copy configuration file to oci oss + command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - framework-spark-cluster + +- name: Copy log4j2 xml file + template: src=log4j2.xml.j2 dest={{ analytics.home }}/models-{{ model_version }}/log4j2.xml mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: [ dataproducts, framework, ed-dataproducts ] + +- name: Copy Scripts + template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + with_items: "{{ analytics.scripts }}" + tags: [ dataproducts, framework, ed-dataproducts ] + when: dockdataproducts is undefined + +- name: Copy python sanity check script file + template: src=exhaust_sanity_check.py.j2 dest={{ analytics.home }}/scripts/exhaust_sanity_check.py + tags: [ dataproducts, framework, ed-dataproducts ] + when: dockdataproducts is undefined + +- name: Copy Dock Scripts + template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + with_items: "{{ analytics.dockScripts }}" + tags: [ dataproducts, framework, ed-dataproducts ] + when: dockdataproducts is defined + +- name: Update model config + template: src=model-config.j2 dest={{ analytics.home }}/scripts/model-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + when: dockdataproducts is undefined + +- name: Update model dock config + template: src=model-dock-config.j2 dest={{ analytics.home }}/scripts/model-dock-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + when: dockdataproducts is defined + +- name: Copy submit-all-jobs ruby file + template: src=submit-all-jobs.rb.j2 dest={{ analytics.home }}/scripts/submit-all-jobs.rb mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + +- name: Copy model-config.json file + template: src=model-config.json.j2 dest={{ analytics.home }}/scripts/model-config.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - dataproducts + - update-config + - ed-dataproducts + +- name: Clean cron jobs + command: crontab -r + ignore_errors: yes + tags: + - default-jobs + - spark-jobs + - spark1-jobs + - clean-cronjobs + +- name: Create daily cron jobs for wfs + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh wfs" + with_dict: "{{ run_wfs_job }}" + tags: + - spark1-jobs + +- name: Create daily cron jobs for monitor job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh monitor-job-summ" + with_dict: "{{ run_monitor_job }}" + tags: + - spark1-jobs + +- name: Create daily cron jobs using submit-all-jobs + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job='/bin/bash -lc "ruby {{ analytics.home }}/scripts/submit-all-jobs.rb"' + with_dict: "{{ submit_jobs }}" + tags: + - default-jobs + - spark-jobs + - cronjobs + +- name: Create start-jobmanager cron jobs + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/start-jobmanager.sh" + with_dict: "{{ start_jobmanager }}" + tags: + - default-jobs + - spark-jobs + - cronjobs + +- name: Create course-batch-status-updater cron job + cron: name="{{env}}-course-batch-status-updater" minute=*/{{ course_batch_status_updater_job_schedule }} job="{{ analytics.home }}/scripts/run-job.sh course-batch-status-updater" + tags: + - cronjobs + - default-jobs + - spark1-jobs + +- name: Create admin-user-reports cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-user-reports" + with_dict: "{{ run_admin_user_reports_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs +- name: Create admin-geo-reports cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-geo-reports" + with_dict: "{{ run_admin_geo_reports_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create assessment-aggregator reports cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="/bin/bash {{ analytics.home }}/adhoc-scripts/run_exporter.sh > /home/analytics/output.log" + with_dict: "{{ run_assessment_aggregator_report_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create etb metrics cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} weekday={{ item.value.weekday }} job="{{ analytics.home }}/scripts/run-job.sh etb-metrics" + with_dict: "{{ run_etb_metrics_weekly_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create progress-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh progress-exhaust" + with_dict: "{{ run_progress_exhaust }}" + tags: + - cronjobs + - default-jobs + - spark1-jobs + +- name: Create response-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh response-exhaust" + with_dict: "{{ run_response_exhaust }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create cassandra-migration cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh cassandra-migration" + with_dict: "{{ run_cassandra_migration }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + + +- name: Create userinfo-exhaust cron job + cron: name="{{ env }}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh userinfo-exhaust" + with_dict: "{{ run_userinfo_exhaust }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create collection-summary cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh collection-summary-report" + with_dict: "{{ run_collection_summary }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Copy collection-summary ingestion spec + copy: src="collection-summary-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - ed-dataproducts + +- name: Create sourcing-summary cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-dock-job.sh sourcing-summary-report" + with_dict: "{{ run_sourcing_summary }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create uci-private-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-private-exhaust" + with_dict: "{{ run_uci_private_exhaust_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Create uci-response-exhaust cron job + cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-response-exhaust" + with_dict: "{{ run_uci_response_exhaust_job }}" + tags: + - cronjobs + - default-jobs + - spark-jobs + +- name: Copy sourcing-summary ingestion spec + copy: src="sourcing-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - ed-dataproducts + +- name: Update start jobmanager + template: src=start-jobmanager.j2 dest={{ analytics.home }}/scripts/start-jobmanager.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} + tags: + - update-jobmanager-config + - dataproducts + +# Cluster job sumbit tasks +- name: Copy cluster-config.json file + template: src=cluster-config.json.j2 dest={{ analytics_cluster.home }}/cluster-config.json + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Copy submit-script.sh file + template: src=submit-script.j2 dest={{ analytics_cluster.home }}/submit-script.sh mode=755 + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Copy model-config.sh file + template: src=model-config.j2 dest={{ analytics_cluster.home }}/model-config.sh + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Replay Job + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --startDate {{ start_date }} --endDate {{ end_date }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} &" + async: "{{ (pause_min * 60) }}" + poll: 0 + tags: + - replay-job + +- name: Run Job + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} --batch_id {{ batch_id }} &" + async: "{{ (pause_min * 60) }}" + poll: 0 + tags: + - run-job + +- name: Submit jobs + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ item }} --mode default --sparkMaster yarn &" + with_items: "{{ jobs.split(',')|list }}" + tags: + - job-submit + +# Cluster exhaust parallel jobs sumbit tasks + +- name: Install required python packages + pip: + name: + - psycopg2-binary + - pandas + - IPython + tags: + - parallel-jobs-submit + +- name: Copy python script file + template: src=update-job-requests.py.j2 dest={{ analytics_cluster.home }}/update-job-requests.py + delegate_to: localhost + tags: + - parallel-jobs-submit + +- name: Execute python script to populate batch numbers + shell: | + if echo "{{jobs}}" | grep 'druid' + then + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{env}}_report_config + elif echo "{{jobs}}" | grep 'exhaust' + then + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{env}}_job_request + fi + tags: + - parallel-jobs-submit + register: jobsCountStr + + +- debug: + var: jobsCountStr + tags: + - parallel-jobs-submit + +- name: Get stdout with parallelisation value from python script to tmp file + shell: echo "{{ jobsCountStr.stdout }}" > /tmp/test.txt + tags: + - parallel-jobs-submit + +- name: Extract parallelisation value from tmp file + shell: "cat /tmp/test.txt | tr '\n' ' ' | awk -F': ' '{print $NF}'" + register: jobsCountOut + tags: + - parallel-jobs-submit + +- debug: + var: jobsCountOut + tags: + - parallel-jobs-submit + +# set jobs count variable from python script output +- set_fact: + jobs_count: "{{ jobsCountOut.stdout }}" + tags: + - parallel-jobs-submit + +- name: Submit parallel exhaust jobs + shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ jobs }} --mode parallel-jobs --parallelisation {{ jobs_count }} &" + poll: 30 + tags: + - parallel-jobs-submit + register: submitOutput + +- debug: + var: submitOutput + tags: + - parallel-jobs-submit + +# Execute Exhaust job sanity check script tasks + +- name: Install required python packages + pip: + name: + - requests + tags: + - run-sanity + +- name: Run sanity check python script + shell: python {{ analytics.home }}/scripts/exhaust_sanity_check.py + tags: + - run-sanity + register: SanityCheckStatus \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 new file mode 100644 index 0000000000..12ebf0bde0 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 @@ -0,0 +1,86 @@ + +{% if dp_object_store_type == "azure" %} +{ + "jars": [ + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% elif (dp_object_store_type == "s3") %} +{ + "jars": [ + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "s3n://{{ bucket }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% elif (dp_object_store_type == "oci") %} +{ + "jars": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} +{% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 new file mode 100644 index 0000000000..e0ec7005df --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 @@ -0,0 +1,317 @@ +application.env="{{ env }}" +telemetry.version="2.1" +default.parallelization="10" +spark_output_temp_dir="/mount/data/analytics/tmp/" +lp.url="{{lp_url}}" +service.search.url="{{ service.search.url }}" +service.search.path="{{ service.search.path }}" +spark.cassandra.connection.host="{{groups['dp-cassandra'][0]}}" +cassandra.keyspace_prefix="{{ cassandra_keyspace_prefix }}" +cassandra.hierarchy_store_prefix="{{ cassandra_hierarchy_store_prefix }}" + + +storage.key.config="{{ dp_storage_key_config }}" +storage.secret.config="{{ dp_storage_secret_config }}" +reports.storage.key.config="{{ dp_reports_storage_key_config }}" +reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" +{% if dp_object_store_type == "azure" %} +cloud_storage_type="azure" +{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} +{% if cloud_service_provider == "oci" %} +cloud_storage_type="oci" +{% else %} +cloud_storage_type="s3" +{% endif %} +cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" +cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" +storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" +aws_storage_key="{{ s3_storage_key }}" +aws_storage_secret="{{ s3_storage_secret }}" +{% endif %} + +lp.contentmodel.versionkey="jd5ECm/o0BXwQCe8PfZY1NoUkB9HN41QjA80p22MKyRIcP5RW4qHw8sZztCzv87M" + +# Joblog Kafka appender config for cluster execution +log.appender.kafka.enable="false" +log.appender.kafka.broker_host="{{groups['processing-cluster-kafka'][0]}}:9092" +log.appender.kafka.topic="{{ env }}.druid.events.log" + +# Kafka connection configuration +kafka.consumer.brokerlist="{{groups['processing-cluster-kafka'][0]}}:9092" +kafka.consumer.topic="{{ env }}.analytics.job_queue" +no_of_jobs=42 + +# Spark Driver +spark.driver.memory=6g + +spark.memory_fraction={{ spark.memory_fraction }} +spark.storage_fraction={{ spark.storage_fraction }} +spark.driver_memory="{{ spark.driver_memory }}" + +#Monitor Jobs + +monitor { + notification { + webhook_url = "{{ data_exhaust_webhook_url }}" + channel = "{{ data_exhaust_Channel }}" + token = "{{ data_exhaust_token }}" + slack = true + name = "{{ data_exhaust_name }}" + } +} + +#App ID & Channel ID +default.consumption.app.id="no_value" +default.channel.id="in.ekstep" +default.creation.app.id="no_value" + + +# Media Service Type +media_service_type = "azure" + +azure_tenant="{{ media_service_azure_tenant }}" +azure_subscription_id="{{ media_service_azure_subscription_id }}" +azure_account_name="{{ media_service_azure_account_name }}" +azure_resource_group_name="{{ media_service_azure_resource_group_name }}" +azure_token_client_key="{{ media_service_azure_token_client_key }}" +azure_token_client_secret="{{ media_service_azure_token_client_secret }}" +elasticsearch.service.endpoint="http://{{groups['composite-search-cluster'][0]}}:9200" +elasticsearch.index.compositesearch.name="{{ es_search_index }}" + +org.search.api.url="{{ channelSearchServiceEndpoint }}" +org.search.api.key="{{ searchServiceAuthorizationToken }}" + +hierarchy.search.api.url="{{ hierarchySearchServiceUrl }}" +hierarchy.search.api.path="{{ hierarchySearchServicEndpoint }}" + +# Azure Media Service Config +azure { + location = "centralindia" + tenant = "tenant name" + subscription_id = "subscription id" + + api { + endpoint="Media Service API End Point" + version = "2018-07-01" + } + + account_name = "account name" + resource_group_name = "Resource Group Name" + + transform { + default = "media_transform_default" + hls = "media_transform_hls" + } + + stream { + base_url = "{{ stream_base_url }}" + endpoint_name = "default" + protocol = "Hls" + policy_name = "Predefined_ClearStreamingOnly" + } + + token { + client_key = "client key" + client_secret = "client secret" + } +} + +## Reports - Global config +cloud.container.reports="reports" + +# course metrics container in azure +course.metrics.cassandra.sunbirdKeyspace="sunbird" +course.metrics.cassandra.sunbirdCoursesKeyspace="sunbird_courses" +course.metrics.cassandra.sunbirdHierarchyStore="{{ cassandra_hierarchy_store_keyspace }}" +course.metrics.cloud.objectKey="" +course.metrics.cassandra.input.consistency="QUORUM" +es.host="http://{{groups['core-es'][0]}}" +es.port="9200" +es.composite.host="{{groups['composite-search-cluster'][0]}}" + +# State admin user reports +# Uses azure only - course.metrics.cloud.provider +admin.metrics.cloud.objectKey="" +admin.metrics.temp.dir="/mount/data/analytics/admin-user-reports" + +#Assessment report config +es.scroll.size = 1000 + +#BestScore or Latst Updated Score +assessment.metrics.bestscore.report=true +assessment.metrics.supported.contenttype="SelfAssess" +assessment.metrics.supported.primaryCategories="{{ assessment_metric_primary_category }}" +spark.sql.caseSensitive=true + +# content rating configurations + +druid.sql.host="http://{{druid_broker_host}}:8082/druid/v2/sql/" +druid.unique.content.query="{\"query\":\"SELECT DISTINCT \\\"object_id\\\" AS \\\"Id\\\"\\nFROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\"}" +druid.content.rating.query="{\"query\":\"SELECT \\\"object_id\\\" AS contentId, COUNT(*) AS \\\"totalRatingsCount\\\", SUM(edata_rating) AS \\\"Total Ratings\\\", SUM(edata_rating)/COUNT(*) AS \\\"averageRating\\\" FROM \\\"druid\\\".\\\"telemetry-feedback-events\\\" WHERE \\\"eid\\\" = 'FEEDBACK' AND \\\"edata_rating\\\">0 GROUP BY \\\"object_id\\\"\"}" +druid.content.consumption.query="{\"query\":\"SELECT COUNT(*) as \\\"play_sessions_count\\\", object_id as \\\"contentId\\\", SUM(total_time_spent) as \\\"total_time_spent\\\", dimensions_pdata_id, object_id\\nFROM \\\"summary-events\\\"\\nWHERE \\\"dimensions_mode\\\" = 'play' AND \\\"dimensions_type\\\" ='content' AND \\\"dimensions_pdata_pid\\\" != 'creation-portal' \\nGROUP BY object_id, dimensions_pdata_id\"}" +lp.system.update.base.url="{{lp_url}}/system/v3/content/update" + + +#Experiment Configuration + +user.search.api.url="{{sunbird_learner_service_url}}/private/user/v1/search" +user.search.limit="10000" + +# pipeline auditing +druid.pipeline_metrics.audit.query="{\"query\":\"SELECT \\\"job-name\\\", SUM(\\\"success-message-count\\\") AS \\\"success-message-count\\\", SUM(\\\"failed-message-count\\\") AS \\\"failed-message-count\\\", SUM(\\\"duplicate-event-count\\\") AS \\\"duplicate-event-count\\\", SUM(\\\"batch-success-count\\\") AS \\\"batch-success-count\\\", SUM(\\\"batch-error-count\\\") AS \\\"batch-error-count\\\", SUM(\\\"primary-route-success-count\\\") AS \\\"primary-route-success-count\\\", SUM(\\\"secondary-route-success-count\\\") AS \\\"secondary-route-success-count\\\" FROM \\\"druid\\\".\\\"pipeline-metrics\\\" WHERE \\\"job-name\\\" IN (%s) AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s' GROUP BY \\\"job-name\\\" \"}" +druid.telemetryDatasource.count.query="{ \"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"telemetry-events\\\" WHERE TIME_FORMAT(MILLIS_TO_TIMESTAMP(\\\"syncts\\\"), 'yyyy-MM-dd HH:mm:ss.SSS', 'Asia/Kolkata') BETWEEN TIMESTAMP '%s' AND '%s' AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" +druid.summaryDatasource.count.query="{\"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" + +#Pipeline Audit Jobs + +pipeline_audit { + notification { + webhook_url = "{{ data_exhaust_webhook_url }}" + channel = "{{ data_exhaust_Channel }}" + token = "{{ data_exhaust_token }}" + slack = true + name = "Pipeline Audit" + } +} + +#Druid Query Processor + +druid = { + hosts = "{{druid_broker_host}}:8082" + secure = false + url = "/druid/v2/" + datasource = "telemetry-events" + response-parsing-timeout = 300000 + client-backend = "com.ing.wbaa.druid.client.DruidAdvancedHttpClient" + client-config = { + druid-advanced-http-client ={ + queue-size = 32768 + queue-overflow-strategy = "Backpressure" + query-retries = 5 + query-retry-delay = 10 ms + host-connection-pool = { + max-connections = 32 + min-connections = 0 + max-open-requests = 128 + max-connection-lifetime = 20 min + idle-timeout = 15 min + client = { + # The time after which an idle connection will be automatically closed. + # Set to `infinite` to completely disable idle timeouts. + idle-timeout = 10 min + parsing.max-chunk-size = 10m + } + } + } + + } +} +druid.rollup.host="{{druid_rollup_broker_host}}" +druid.rollup.port=8082 +druid.query.wait.time.mins=10 +druid.report.upload.wait.time.mins=10 +druid.scan.batch.size=100 +druid.scan.batch.bytes=2000000 +druid.query.batch.buffer=500000 + + +// Metric event config +metric.producer.id="pipeline.monitoring" +metric.producer.pid="dataproduct.metrics" +push.metrics.kafka=true +metric.kafka.broker="{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" +metric.kafka.topic="{{ env }}.prom.monitoring.metrics" + +//Postgres Config +postgres.db="{{postgres.db_name}}" +postgres.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" +postgres.user="{{postgres.db_username}}" +postgres.pass="{{postgres.db_password}}" +postgres.program.table="program" +postgres.nomination.table="nomination" +postgres.usertable="\"V_User\"" +postgres.org.table="\"V_User_Org\"" + +druid.ingestion.path="/druid/indexer/v1/task" +druid.segment.path="/druid/coordinator/v1/metadata/datasources/" +druid.deletesegment.path="/druid/coordinator/v1/datasources/" + +postgres.druid.db="{{ druid_report_postgres_db_name }}" +postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" +postgres.druid.user="{{ druid_report_postgres_db_username }}" +postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" + + +location.search.url="https://{{location_search_url}}/v1/location/search" +location.search.token="{{ location_search_token }}" +location.search.request="{\"request\": {\"filters\": {\"type\" :[\"state\",\"district\"]},\"limit\" : 10000}}" + +druid.state.lookup.url = "http://{{groups['raw-coordinator'][0]}}:8081/druid/coordinator/v1/lookups/config/__default/stateSlugLookup" + +sunbird_encryption_key="{{ core_vault_sunbird_encryption_key }}" + +dcedialcode.filename="DCE_dialcode_data.csv" +etbdialcode.filename="ETB_dialcode_data.csv" +dcetextbook.filename="DCE_textbook_data.csv" +etbtextbook.filename="ETB_textbook_data.csv" +etb.dialcode.druid.length={{ etb_dialcode_list_druid_length }} + +{% if dp_object_store_type == "azure" %} +druid.report.default.storage="azure" +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +druid.report.default.storage="s3" +{% endif %} + +druid.report.date.format="yyyy-MM-dd" +druid.report.default.container="report-verification" + +## Collection Exhaust Jobs Configuration -- Start ## + +sunbird.user.keyspace="{{ user_table_keyspace }}" +sunbird.courses.keyspace="{{ course_keyspace }}" +sunbird.content.hierarchy.keyspace="{{ cassandra_hierarchy_store_keyspace }}" +sunbird.user.cluster.host="{{ core_cassandra_host }}" +sunbird.courses.cluster.host="{{ core_cassandra_host }}" +sunbird.content.cluster.host="{{ core_cassandra_host }}" +sunbird.report.cluster.host="{{ report_cassandra_cluster_host }}" +sunbird.user.report.keyspace="{{ report_user_table_keyspace }}" +collection.exhaust.store.prefix="" +postgres.table.job_request="{{ job_request_table }}" +postgres.table.dataset_metadata="{{ dataset_metadata_table }}" + +## Collection Exhaust Jobs Configuration -- End ## + +## Exhaust throttling variables +exhaust.batches.limit.per.channel={{ exhaust_batches_limit_per_channel }} +exhaust.file.size.limit.per.channel={{ exhaust_file_size_limit_bytes_per_channel }} + +exhaust.parallel.batch.load.limit={{ exhaust_parallel_batch_load_limit }} +exhaust.user.parallelism={{ exhaust_user_parallelism }} + +data_exhaust.batch.limit.per.request={{ data_exhaust_batch_limit_per_request }} + + + +//START of UCI Postgres Config + +uci.conversation.postgres.db="{{ uci_postgres.conversation_db_name }}" +uci.conversation.postgres.url="jdbc:postgresql://{{uci_postgres.conversation_db_host}}:{{uci_postgres.conversation_db_port}}/" + +uci.fushionauth.postgres.db="{{ uci_postgres.fushionauth_db_name }}" +uci.fushionauth.postgres.url="jdbc:postgresql://{{uci_postgres.fushionauth_db_host}}:{{uci_postgres.fushionauth_db_port}}/" + +uci.postgres.table.conversation="{{ uci_postgres.conversation_table_name }}" +uci.postgres.table.user="{{ uci_postgres.user_table_name }}" +uci.postgres.table.user_registration="{{ uci_postgres.user_registration_table_name }}" +uci.postgres.table.identities="{{ uci_postgres.user_identities_table_name }}" + +uci.conversation.postgres.user="{{ uci_postgres.conversation_db_user }}" +uci.conversation.postgres.pass="{{ uci_postgres.conversation_db_psss }}" + +uci.fushionauth.postgres.user="{{ uci_postgres.fushionauth_db_user }}" +uci.fushionauth.postgres.pass="{{ uci_postgres.fushionauth_db_psss }}" + +uci.exhaust.store.prefix="" +uci.encryption.secret="{{ uci_encryption_secret_key }}" + +// END OF UCI Related Job Configs \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 new file mode 100644 index 0000000000..3f6ba98d9d --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 @@ -0,0 +1,58 @@ +import requests +from requests.auth import HTTPBasicAuth +import json +from kafka import KafkaConsumer +from json import loads +import sys + +def checkClusterStatus(): + try: + res = requests.get('https://{{ spark_cluster_name }}.azurehdinsight.net/api/v1/clusters/{{ spark_cluster_name }}/alerts?format=summary', auth = HTTPBasicAuth("{{ admin_name }}" ,"{{ admin_password }}")) + if(res.status_code == 200): + resJson = json.loads(res.text) + warningCount = resJson["alerts_summary"]["WARNING"]["count"] + criticalCount = resJson["alerts_summary"]["CRITICAL"]["count"] + unknownCount = resJson["alerts_summary"]["UNKNOWN"]["count"] + if((warningCount + criticalCount + unknownCount) == 0): + print("Cluster is up & running fine. With these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount)) + return "SUCCESS" + else: + return "FAILED. Cluster is not running properly. Found these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount) + else: + return "FAILED. Cluster failed to provide response. Resulted in {0} response".format(res.status_code) + except Exception as e: + return "FAILED with {0}".format(str(e)) + +def checkCassandraMigratorStatus(): + try: + ## from joblog file + migratorENDEvent = "" + with open ('{{ analytics.home }}/scripts/logs/joblog.log', 'rt') as logs: + for log in logs: + if (log.count("JOB_END") == 1 & log.count("{{ cassandra_migrator_job_name }}") == 1): + migratorENDEvent = log + logJson = json.loads(migratorENDEvent) + jobStatus = logJson["edata"]["status"] + if (jobStatus == "SUCCESS"): + print("Cassandra Migrator Completed successfully!") + return "SUCCESS" + else: + return "Cassandra Migrator failed" + except Exception as e: + return "FAILED with {0}".format(str(e)) + + +def main(): + finalSuccessMessage="All checks are successful" + ## check Cassandra Migrator status + cassandraMigratorState=checkCassandraMigratorStatus() + ## check spark cluster status + clusterState=checkClusterStatus() + + if(cassandraMigratorState == "SUCCESS" and clusterState == "SUCCESS"): + return finalSuccessMessage + else: + raise Exception("Required checks failed. Job Status: {0} and Cluster status: {1}".format(cassandraMigratorState, clusterState)) + +result=main() +print(result) \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 new file mode 100644 index 0000000000..c82cdd702c --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 @@ -0,0 +1,54 @@ + + + + {{ analytics.home }}/scripts/logs + {{ analytics.home }}/scripts/logs + + + + + + + + %m%n + + + + + + + + + + + + + + %m%n + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 new file mode 100644 index 0000000000..86f376b65d --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 @@ -0,0 +1,151 @@ +#!/usr/bin/env bash + +config() { + bucket={{ secor_bucket }} + brokerList={{ brokerlist }} + zookeeper={{ zookeeper }} + brokerIngestionList={{ ingestion_kafka_brokers }} + job_topic={{ analytics_job_queue_topic }} + topic={{ topic }} + analyticsMetricsTopic={{ analytics_metrics_topic }} + sinkTopic={{ sink_topic }} + metricsTopic={{ metrics_topic }} + analytics_home={{ analytics.home }} + temp_folder={{ job_manager_tmp_dir }} + sparkCassandraConnectionHost="{{ lp_cassandra_host }}" + sparkRedisConnectionHost={{ metadata2_redis_host }} + sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" + sunbirdPlatformElasticsearchHost="{{ sunbird_es_host }}" + jobManagerJobsCount="{{ analytics_jobs_count }}" + producerEnv="{{ producer_env }}" + baseScriptPath="{{ spark_output_temp_dir }}" + reportPostContainer="{{ reports_container }}" + druidIngestionURL="{{ druid_rollup_cluster_ingestion_task_url }}/druid/indexer/v1/task" + assessTopic={{ assess_topic }} + + + if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi + if [ ! -z "$3" ]; then inputBucket=$3; fi + if [ ! -z "$4" ]; then sinkTopic=$4; fi + if [ ! -z "$2" ]; then keyword=$2; fi + case "$1" in + "assessment-correction") + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' + ;; + "assessment-archival") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' + ;; + "assessment-archived-removal") +{% if dp_object_store_type == "azure" %} + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + ;; +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + ;; +{% endif %} + "collection-reconciliation-job") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' + ;; + "collection-summary-report") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + ;; + "score-metric-migration-job") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' + ;; + "assessment-score-metric-correction") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' + ;; + "course-batch-status-updater") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' + ;; + "collection-summary-report-v2") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' + ;; + "uci-private-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' + ;; + "uci-response-exhaust") + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' + ;; + "userinfo-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + ;; + "program-collection-summary-report") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + ;; + "response-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' + ;; + "response-exhaust-v2") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + ;; + "progress-exhaust") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + ;; + "progress-exhaust-v2") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' + ;; + "druid_reports") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' + ;; + "cassandra-migration") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.updater.CassandraMigratorJob","modelParams":{"cassandraDataHost":"{{ core_cassandra_host }}","cassandraMigrateHost":"{{ report_cassandra_host }}","keyspace":"sunbird_courses","cassandraDataTable":"user_enrolments","cassandraMigrateTable":"{{ report_user_enrolment_table }}","repartitionColumns":"batchid"},"parallelization":10,"appName":"Cassandra Migrator","deviceMapping":false}' + ;; + "monitor-job-summ") + echo '{"search":{"type":"local","queries":[{"file":"'$analytics_home'/scripts/logs/joblog.log"}]},"model":"org.ekstep.analytics.model.MonitorSummaryModel","modelParams":{"pushMetrics":true,"brokerList":"'$brokerList'","topic":"'$analyticsMetricsTopic'","model":[{"model":"WorkFlowSummaryModel","category":"consumption","input_dependency":"None"},{"model":"UpdateContentRating","category":"consumption","input_dependency":"None"},{"model":"DruidQueryProcessingModel","category":"consumption","input_dependency":"None"},{"model":"MetricsAuditJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminReportJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminGeoReportJob","category":"consumption","input_dependency":"None"},{"model":"CourseEnrollmentJob","category":"consumption","input_dependency":"None"}]},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"appName":"TestMonitorSummarizer","deviceMapping":true}' + ;; + "job-manager") + echo '{"jobsCount":'$jobManagerJobsCount',"topic":"'$job_topic'","bootStrapServer":"'$brokerList'","zookeeperConnect":"'$zookeeper'","consumerGroup":"jobmanager","slackChannel":"#test_channel","slackUserName":"JobManager","tempBucket":"'$bucket'","tempFolder":"'$temp_folder'"}' + ;; + "wfs") + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' + #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' + ;; + "video-streaming") + echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' + ;; + "admin-user-reports") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' + ;; + "admin-geo-reports") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' + ;; + "telemetry-replay") + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' + ;; + "summary-replay") + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' + ;; + "content-rating-updater") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.updater.UpdateContentRating","modelParams": {"startDate": "'$endDate'","endDate": "'$endDate'"},"output": [{"to":"console","params":{"printEvent":false}}],"parallelization": 8,"appName": "Content Rating Updater","deviceMapping": false}' + ;; + "experiment") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.ExperimentDefinitionModel","modelParams":{"sparkElasticsearchConnectionHost":"{{ lp_composite_search_host }}"},"output":[{"to":"elasticsearch","params":{"index":"experiment"}}],"parallelization":8,"appName":"Experiment-Definition","deviceMapping":false}' + ;; + "etb-metrics") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' + ;; + "course-enrollment-report") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' + ;; + "course-consumption-report") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' + ;; + "textbook-progress-report") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' + ;; + "audit-metrics-report") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' + ;; + "sourcing-metrics") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + ;; + "druid-dataset") + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' + ;; + "*") + echo "Unknown model code" + exit 1 # Command to come out of the program with status 1 + ;; + esac +} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 new file mode 100644 index 0000000000..a3569c7f46 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 @@ -0,0 +1,670 @@ +{ + "wfs": { + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "unique/raw/", + "endDate": "$(date --date yesterday '+%Y-%m-%d')", + "delta": 0 + } + ] + }, + "filters": [ + { + "name": "actor", + "operator": "ISNOTNULL" + } + ], + "model": "org.ekstep.analytics.model.WorkflowSummary", + "modelParams": { + "apiVersion": "v2", + "parallelization": 32 + }, + "output": [ + { + "to": "{{dp_object_store_type}}", + "params": { + "bucket": "{{ bucket }}", + "key": "{{ job_manager_tmp_dir }}/wfs/$(date --date yesterday '+%Y-%m-%d')" + } + }, + { + "to": "kafka", + "params": { + "brokerList": "{{ brokerlist }}", + "topic": "{{ topic }}" + } + } + ], + "parallelization": 32, + "appName": "Workflow Summarizer", + "deviceMapping": true + }, + "video-streaming": { + "search": { + "type": "{{dp_object_store_type}}" + }, + "model": "org.ekstep.analytics.job.VideoStreamingJob", + "modelParams": { + "maxIterations": 10 + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Video Streaming Job", + "deviceMapping": false + }, + "admin-user-reports": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.job.report.StateAdminReportJob", + "modelParams": { + "sparkCassandraConnectionHost": "{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Admin User Reports", + "deviceMapping": false + }, + "admin-geo-reports": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.job.report.StateAdminGeoReportJob", + "modelParams": { + "sparkCassandraConnectionHost": "{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Admin Geo Reports", + "deviceMapping": false + }, + "content-rating-updater": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.updater.UpdateContentRating", + "modelParams": { + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date '+%Y-%m-%d')" + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + } + ], + "parallelization": 8, + "appName": "Content Rating Updater", + "deviceMapping": false + }, + "monitor-job-summ": { + "search": { + "type": "local", + "queries": [ + { + "file": "{{ analytics.home }}/scripts/logs/joblog.log" + } + ] + }, + "model": "org.ekstep.analytics.model.MonitorSummaryModel", + "modelParams": { + "pushMetrics": true, + "brokerList": "{{ brokerlist }}", + "topic": "{{ analytics_metrics_topic }}", + "model": [ + { + "model": "WorkFlowSummaryModel", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "UpdateContentRating", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "DruidQueryProcessingModel", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "MetricsAuditJob", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "StateAdminReportJob", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "StateAdminGeoReportJob", + "category": "consumption", + "input_dependency": "None" + }, + { + "model": "CourseEnrollmentJob", + "category": "consumption", + "input_dependency": "None" + } + ] + }, + "output": [ + { + "to": "console", + "params": { + "printEvent": false + } + }, + { + "to": "kafka", + "params": { + "brokerList": "{{ brokerlist }}", + "topic": "{{ topic }}" + } + } + ], + "appName": "TestMonitorSummarizer", + "deviceMapping": true + }, + "experiment": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.ExperimentDefinitionModel", + "modelParams": { + "sparkElasticsearchConnectionHost": "{{ lp_composite_search_host }}" + }, + "output": [ + { + "to": "elasticsearch", + "params": { + "index": "experiment" + } + } + ], + "parallelization": 8, + "appName": "Experiment-Definition", + "deviceMapping": false + }, + "etb-metrics": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.report.ETBMetricsJob", + "modelParams": { + "reportConfig": { + "id": "etb_metrics", + "metrics": [], + "labels": { + "date": "Date", + "identifier": "TextBook ID", + "name": "TextBook Name", + "medium": "Medium", + "gradeLevel": "Grade", + "subject": "Subject", + "createdOn": "Created On", + "lastUpdatedOn": "Last Updated On", + "totalQRCodes": "Total number of QR codes", + "contentLinkedQR": "Number of QR codes with atleast 1 linked content", + "withoutContentQR": "Number of QR codes with no linked content", + "withoutContentT1": "Term 1 QR Codes with no linked content", + "withoutContentT2": "Term 2 QR Codes with no linked content", + "status": "Status", + "totalContentLinked": "Total content linked", + "totalQRLinked": "Total QR codes linked to content", + "totalQRNotLinked": "Total number of QR codes with no linked content", + "leafNodesCount": "Total number of leaf nodes", + "leafNodeUnlinked": "Number of leaf nodes with no content", + "l1Name": "Level 1 Name", + "l2Name": "Level 2 Name", + "l3Name": "Level 3 Name", + "l4Name": "Level 4 Name", + "l5Name": "Level 5 Name", + "dialcode": "QR Code", + "sum(scans)": "Total Scans", + "noOfContent": "Number of contents", + "nodeType": "Type of Node", + "term": "Term" + }, + "output": [{ + "type": "csv", + "dims": ["identifier", "channel", "name"], + "fileParameters": ["id", "dims"] + }], + "mergeConfig": { + "frequency": "WEEK", + "basePath": "{{ spark_output_temp_dir }}", + "rollup": 0, + "reportPath": "dialcode_counts.csv", + "postContainer":"{{ reports_container }}" + } + }, + "dialcodeReportConfig": { + "id": "etb_metrics", + "metrics": [], + "labels": {}, + "output": [{ + "type": "csv", + "dims": ["identifier", "channel", "name"], + "fileParameters": ["id", "dims"] + }], + "mergeConfig": { + "frequency": "WEEK", + "basePath": "{{ spark_output_temp_dir }}", + "rollup": 1, + "reportPath": "dialcode_counts.csv", + "rollupAge": "ACADEMIC_YEAR", + "rollupCol": "Date", + "rollupRange": 10, + "postContainer":"{{ reports_container }}" + } + }, + "etbFileConfig": { + "bucket": "{{ reports_container }}", + "file": "dialcode_scans/dialcode_counts.csv" + }, + "druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"},{"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName": "name","aliasName": "name"},{"fieldName": "createdFor","aliasName": "createdFor"},{"fieldName": "createdOn","aliasName": "createdOn"},{"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"},{"fieldName": "board","aliasName": "board"},{"fieldName": "medium","aliasName": "medium"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"},{"fieldName": "subject","aliasName": "subject"},{"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"},{"type": "in","dimension": "status","values": ["Live","Draft","Review"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}}, + "tenantConfig": { + "tenantId": "", + "slugName": "" + }, + "store": "{{dp_object_store_type}}", + "format": "csv", + "key": "druid-reports/", + "filePath": "druid-reports/", + "container": "{{ bucket }}", + "folderPrefix": ["slug", "reportName"] + }, + "output": [{ + "to": "console", + "params": { + "printEvent": false + } + }], + "parallelization": 8, + "appName": "ETB Metrics Model", + "deviceMapping": false + }, + "course-enrollment-report":{ + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.report.CourseEnrollmentJob", + "modelParams": { + "reportConfig": { + "id": "tpd_metrics", + "metrics" : [], + "labels": { + "completionCount": "Completion Count", + "status": "Status", + "enrollmentCount": "Enrollment Count", + "courseName": "Course Name", + "batchName": "Batch Name" + }, + "output": [{ + "type": "csv", + "dims": [] + }] + }, + "esConfig": { + "request": { + "filters":{ + "objectType": ["Content"], + "contentType": ["Course"], + "identifier": [], + "status": ["Live"] + }, + "limit": 10000 + } + }, + "store": "{{dp_object_store_type}}", + "format":"csv", + "key": "druid-reports/", + "filePath": "druid-reports/", + "container": "{{ bucket }}", + "folderPrefix": ["slug", "reportName"], + "sparkCassandraConnectionHost":"{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" + }, + "output": [{ + "to": "console", + "params": { + "printEvent": false + } + }], + "parallelization": 8, + "appName": "TPD Course Enrollment Metrics Model", + "deviceMapping": false + }, + "course-consumption-report":{ + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.report.CourseConsumptionJob", + "modelParams": { + "esConfig": { + "request": { + "filters": { + "objectType": ["Content"], + "contentType": ["Course"], + "identifier": [], + "status": ["Live"] + } + } + }, + "reportConfig": { + "id": "tpd_metrics", + "labels": { + "date": "Date", + "status": "Batch Status", + "timespent": "Timespent in mins", + "courseName": "Course Name", + "batchName": "Batch Name" + }, + "dateRange": { + "staticInterval": "LastDay", + "granularity": "all" + }, + "metrics": [{ + "metric": "totalCoursePlays", + "label": "Total Course Plays (in mins) ", + "druidQuery": { + "queryType": "groupBy", + "dataSource": "summary-events", + "intervals": "LastDay", + "aggregations": [{ + "name": "sum__edata_time_spent", + "type": "doubleSum", + "fieldName": "edata_time_spent" + }], + "dimensions": [{ + "fieldName": "object_rollup_l1", + "aliasName": "courseId" + }, { + "fieldName": "uid", + "aliasName": "userId" + }, { + "fieldName": "context_cdata_id", + "aliasName": "batchId" + }], + "filters": [{ + "type": "equals", + "dimension": "eid", + "value": "ME_WORKFLOW_SUMMARY" + }, { + "type": "in", + "dimension": "dimensions_pdata_id", + "values": ["{{ producer_env }}.app", "{{ producer_env }}.portal"] + }, { + "type": "equals", + "dimension": "dimensions_type", + "value": "content" + }, { + "type": "equals", + "dimension": "dimensions_mode", + "value": "play" + }, { + "type": "equals", + "dimension": "context_cdata_type", + "value": "batch" + }], + "postAggregation": [{ + "type": "arithmetic", + "name": "timespent", + "fields": { + "leftField": "sum__edata_time_spent", + "rightField": 60, + "rightFieldType": "constant" + }, + "fn": "/" + }], + "descending": "false" + } + }], + "output": [{ + "type": "csv", + "metrics": ["timespent"], + "dims": [] + }], + "queryType": "groupBy" + }, + "store": "{{dp_object_store_type}}", + "format":"csv", + "key": "druid-reports/", + "filePath": "druid-reports/", + "container": "{{ bucket }}", + "folderPrefix": ["slug", "reportName"], + "sparkCassandraConnectionHost":"{{core_cassandra_host}}", + "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" + }, + "output": [{ + "to": "console", + "params": { + "printEvent": false + } + }], + "parallelization": 8, + "appName": "TPD Course Consumption Metrics Model", + "deviceMapping": false + }, + "audit-metrics-report": { + "search": { + "type": "none" + }, + "model": "org.ekstep.analytics.model.MetricsAuditJob", + "modelParams": { + "auditConfig": [ + { + "name": "denorm", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "telemetry-denormalized/raw/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + }, + "filters": [ + { + "name": "flags.user_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.content_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.device_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.dialcode_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.collection_data_retrieved", + "operator": "EQ", + "value": true + }, + { + "name": "flags.derived_location_retrieved", + "operator": "EQ", + "value": true + } + ] + }, + { + "name": "failed", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "failed/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "unique", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "unique/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "raw", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "raw/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "channel-raw", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "folder": true, + "bucket": "{{ bucket }}", + "prefix": "channel/*/raw/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" + } + ] + } + }, + { + "name": "channel-summary", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "folder": true, + "bucket": "{{ bucket }}", + "prefix": "channel/*/summary/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" + } + ] + } + }, + { + "name": "derived", + "search": { + "type": "{{dp_object_store_type}}", + "queries": [ + { + "bucket": "{{ bucket }}", + "prefix": "derived/wfs/", + "startDate": "$(date --date yesterday '+%Y-%m-%d')", + "endDate": "$(date --date yesterday '+%Y-%m-%d')" + } + ] + } + }, + { + "name": "telemetry-count", + "search": { + "type": "druid", + "druidQuery": { + "queryType": "timeSeries", + "dataSource": "telemetry-events", + "intervals": "LastDay", + "aggregations": [ + { + "name": "total_count", + "type": "count", + "fieldName": "count" + } + ], + "descending": "false" + } + } + }, + { + "name": "summary-count", + "search": { + "type": "druid", + "druidQuery": { + "queryType": "timeSeries", + "dataSource": "summary-events", + "intervals": "LastDay", + "aggregations": [ + { + "name": "total_count", + "type": "count", + "fieldName": "count" + } + ], + "descending": "false" + } + } + } + ] + }, + "output": [ + { + "to": "kafka", + "params": { + "brokerList": "{{ brokerlist }}", + "topic": "{{ metrics_topic }}" + } + } + ], + "parallelization": 8, + "appName": "Metrics Audit" + } +} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 new file mode 100644 index 0000000000..f720f4687e --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +config() { + bucket={{ bucket }} + brokerList={{ brokerlist }} + zookeeper={{ zookeeper }} + job_topic={{ analytics_job_queue_topic }} + topic={{ topic }} + sparkCassandraConnectionHost="{{ lp_cassandra_host }}" + sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" + reportPostContainer="{{ reports_container }}" + druidRollupHost="{{ druid_rollup_cluster_ingestion_task_url }}" + + if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi + if [ ! -z "$3" ]; then inputBucket=$3; fi + case "$1" in + "content-details") + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{dp_object_store_type}}","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' + ;; + "sourcing-summary-report") + echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "{{dp_object_store_type}}", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' + ;; + "funnel-report") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "{{dp_object_store_type}}","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' + ;; + "sourcing-metrics") + echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{dp_object_store_type}}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + ;; + "*") + echo "Unknown model code" + exit 1 # Command to come out of the program with status 1 + ;; + esac +} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 new file mode 100644 index 0000000000..3a6c969b7b --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products + +cd {{ analytics.home }}/scripts +source model-config.sh +source replay-utils.sh + +libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" + +if [ "$1" == "telemetry-replay" ] + then + if [ ! $# -eq 5 ] + then + echo "Not suffecient arguments. killing process" + exit + fi +fi + +get_report_job_model_name(){ + case "$1" in + "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' + ;; + *) echo $1 + ;; + esac +} + +if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi +if [ -z "$job_config" ]; then job_config=$(config $1 '__endDate__' $4 $5); fi +start_date=$2 +end_date=$3 +backup_key=$1 + +if [ "$1" == "gls-v1" ] + then + backup_key="gls" +elif [ "$1" == "app-ss-v1" ] + then + backup_key="app-ss" +fi + +backup $start_date $end_date {{ bucket }} "derived/$backup_key" "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" +if [ $? == 0 ] + then + echo "Backup completed Successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" + echo "Running the $1 job replay..." >> "$DP_LOGS/$end_date-$1-replay.log" + echo "Job modelName - $job_id" >> "$DP_LOGS/$end_date-$1-replay.log" + $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" +else + echo "Unable to take backup" >> "$DP_LOGS/$end_date-$1-replay.log" +fi + +if [ $? == 0 ] + then + echo "$1 replay executed successfully" >> "$DP_LOGS/$end_date-$1-replay.log" + delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" +else + echo "$1 replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" + rollback {{ bucket }} "derived/$backup_key" "backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" + delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" +fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 new file mode 100644 index 0000000000..580c3bf29c --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products + +cd {{ analytics.home }}/scripts +source model-config.sh +source replay-utils.sh + +job_config=$(config $1 '__endDate__') +start_date=$2 +end_date=$3 + +echo "Running the $1 updater replay..." >> "$DP_LOGS/$end_date-$1-replay.log" +$SPARK_HOME/bin/spark-submit --master local[*] --jars $MODELS_HOME/analytics-framework-2.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$1" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" + +if [ $? == 0 ] + then + echo "$1 updater replay executed successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" +else + echo "$1 updater replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" + exit 1 +fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 new file mode 100644 index 0000000000..31ead572f3 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 @@ -0,0 +1,43 @@ +#!/bin/bash + +rollback() { + bucket_name=$1 + prefix=$2 + backup_dir=$3 + + src="s3://$bucket_name/$prefix/" + dst="s3://$bucket_name/$backup_dir/" + echo "Copy back the $prefix files to source directory $src from backup directory $dst" + aws s3 cp $dst $src --recursive --include "*" --region ap-south-1 +} + +delete() { + bucket_name=$1 + backup_dir=$2 + + path="s3://$bucket_name/$backup_dir/" + echo "Deleting the back-up files from $path" + aws s3 rm $path --recursive --region ap-south-1 +} + +backup() { + dt_start=$1 + dt_end=$2 + prefix=$4 + bucket_name=$3 + backup_dir=$5 + + ts_start=$(date -d $dt_start +%s) + ts_end=$(date -d $dt_end +%s) + src="s3://$bucket_name/$prefix/" + dst="s3://$bucket_name/$backup_dir/" + + + echo "Backing up the files from $src to $dst for the date range - ($dt_start, $dt_end)" + while [ $ts_start -le $ts_end ] + do + date=`date -d @$ts_start +%F` + aws s3 mv $src $dst --recursive --exclude "*" --include "$date-*" --region ap-south-1 + let ts_start+=86400 + done +} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 new file mode 100644 index 0000000000..e6f1cdf9ad --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +## Job to run daily +cd {{ analytics.home }}/scripts +source model-dock-config.sh +today=$(date "+%Y-%m-%d") + +libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" +file_path="dock-{{ env }}.conf" + +get_report_job_model_name(){ + case "$1" in + "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' + ;; + "sourcing-summary-report") echo 'org.sunbird.analytics.sourcing.SourcingSummaryReport' + ;; + "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' + ;; + "content-details") echo 'org.sunbird.analytics.sourcing.ContentDetailsReport' + ;; + *) echo $1 + ;; + esac +} + +if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi + +if [ ! -z "$1" ]; then job_config=$(config $1); else job_config="$2"; fi + +if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi + +echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" + +echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" + +nohup $SPARK_HOME/bin/spark-submit --conf spark.driver.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --conf spark.executor.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" \ >> "$DP_LOGS/$today-job-execution.log" 2>&1 + +echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 new file mode 100644 index 0000000000..26ec84da87 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +## Job to run daily +cd {{ analytics.home }}/scripts +source model-config.sh +today=$(date "+%Y-%m-%d") + +libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" + +get_report_job_model_name(){ + case "$1" in + "course-enrollment-report") echo 'org.sunbird.analytics.job.report.CourseEnrollmentJob' + ;; + "course-consumption-report") echo 'org.sunbird.analytics.job.report.CourseConsumptionJob' + ;; + "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' + ;; + "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' + ;; + "admin-geo-reports") echo 'org.sunbird.analytics.job.report.StateAdminGeoReportJob' + ;; + "etb-metrics") echo 'org.sunbird.analytics.job.report.ETBMetricsJob' + ;; + "admin-user-reports") echo 'org.sunbird.analytics.job.report.StateAdminReportJob' + ;; + "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' + ;; + "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' + ;; + "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' + ;; + "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' + ;; + "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' + ;; + "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' + ;; + "collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' + ;; + "program-collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' + ;; + "collection-summary-report-v2") echo 'org.sunbird.analytics.job.report.CollectionSummaryJobV2' + ;; + "assessment-score-metric-correction") echo 'org.sunbird.analytics.audit.AssessmentScoreCorrectionJob' + ;; + "course-batch-status-updater") echo 'org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob' + ;; + "collection-reconciliation-job") echo 'org.sunbird.analytics.audit.CollectionReconciliationJob' + ;; + "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' + ;; + "score-metric-migration-job") echo 'org.sunbird.analytics.audit.ScoreMetricMigrationJob' + ;; + "assessment-archival") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' + ;; + "assessment-archived-removal") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' + ;; + "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' + ;; + "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' + ;; + *) echo $1 + ;; + esac +} + +if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi + +if [ ! -z "$1" ]; then job_config=$(config $1 $2); else job_config="$2"; fi + +if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi + + +echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" + +echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" + +nohup $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" >> "$DP_LOGS/$today-job-execution.log" 2>&1 + +echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 new file mode 100644 index 0000000000..2e613b9866 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version}}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +export SERVICE_LOGS={{ analytics.home }}/logs/services +export JM_HOME={{ analytics.home }}/job-manager + +export azure_storage_key={{sunbird_private_storage_account_name}} +export azure_storage_secret={{sunbird_private_storage_account_key}} +export reports_azure_storage_key={{sunbird_private_storage_account_name}} +export reports_azure_storage_secret={{sunbird_private_storage_account_key}} +export druid_storage_account_key={{sunbird_public_storage_account_name}} +export druid_storage_account_secret={{sunbird_public_storage_account_key}} + +export heap_conf_str={{ spark.heap_conf_str }} +today=$(date "+%Y-%m-%d") + +kill_job_manager() +{ + echo "Killing currently running job-manager process" >> "$SERVICE_LOGS/$today-job-manager.log" + kill $(ps aux | grep 'JobManager' | awk '{print $2}') >> "$SERVICE_LOGS/$today-job-manager.log" +} + +start_job_manager() +{ + kill_job_manager # Before starting the job, We are killing the job-manager + cd {{ analytics.home }}/scripts + source model-config.sh + job_config=$(config 'job-manager') + echo "Starting the job manager" >> "$SERVICE_LOGS/$today-job-manager.log" + echo "config: $job_config" >> "$SERVICE_LOGS/$today-job-manager.log" + nohup java $heap_conf_str -cp "$SPARK_HOME/jars/*:$MODELS_HOME/*:$MODELS_HOME/data-products-1.0/lib/*" -Dconfig.file=$MODELS_HOME/{{ env }}.conf org.ekstep.analytics.job.JobManager --config "$job_config" >> $SERVICE_LOGS/$today-job-manager.log 2>&1 & + + job_manager_pid=$(ps aux | grep 'JobManager' | awk '{print $2}') # Once Job is started just we are making whether job is running or not. + if [[ ! -z "$job_manager_pid" ]]; then + echo "Job manager is started." >> "$SERVICE_LOGS/$today-job-manager.log" + else + echo "Job manager is not started." >> "$SERVICE_LOGS/$today-job-manager.log" + fi +} +# Tasks +# Kill the job-manager +# Start the job-manager +# Make sure whether is running or not. +start_job_manager + diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 new file mode 100644 index 0000000000..53c032cd29 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 @@ -0,0 +1,58 @@ +require "ruby-kafka" +require 'json' + +@log = File.open("{{ analytics.home }}/logs/logfile.log", 'a') +@kafka = Kafka.new(["{{ kafka_broker_host }}"]) +@topic = "{{ analytics_job_queue_topic }}" +@report_list_jobs_url = "{{ report_list_jobs_url }}" +@submit_jobs_auth_token = "{{ submit_jobs_auth_token }}" +@submit_jobs_command = "source /mount/venv/bin/activate && dataproducts submit_druid_jobs --report_list_jobs_url #{@report_list_jobs_url} --auth_token #{@submit_jobs_auth_token}" + +def log(message) + @log.write("#{Time.now.to_s}: #{message}\n") +end + +def submit_all_jobs + report_jobs = { + "assessment-dashboard-metrics" => "org.sunbird.analytics.job.report.AssessmentMetricsJobV2", + "course-dashboard-metrics" => "org.sunbird.analytics.job.report.CourseMetricsJobV2", + "course-enrollment-report" => "org.sunbird.analytics.job.report.CourseEnrollmentJob", + "course-consumption-report" => "org.sunbird.analytics.job.report.CourseConsumptionJob", + "etb-metrics" => "org.sunbird.analytics.job.report.ETBMetricsJob", + "admin-geo-reports" => "org.sunbird.analytics.job.report.StateAdminGeoReportJob", + "admin-user-reports" => "org.sunbird.analytics.job.report.StateAdminReportJob" + } + jobs = [{{ analytics_job_list }}] + + log("Starting to submit #{jobs.count} jobs for processing") + file = File.read("{{ analytics.home }}/scripts/model-config.json") + file = file.gsub("$(date --date yesterday '+%Y-%m-%d')", `date --date yesterday '+%Y-%m-%d'`.strip) + file = file.gsub("$(date '+%Y-%m-%d')", `date "+%Y-%m-%d"`.strip) + config_hash = JSON.parse(file) + log("Config file loaded") + jobs.each do |job| + if job == "monitor-job-summ" + log("python") + system('/bin/bash -l -c "'+ @submit_jobs_command +'"') + submit_job(job, config_hash[job]) + elsif report_jobs[job].nil? + submit_job(job, config_hash[job]) + else + submit_job(report_jobs[job], config_hash[job]) + end + + log("Submitted #{jobs.count} jobs for processing") + end +end + +def submit_job(job, config) + job_config = {model: job, config: config}.to_json + log("message: #{job_config}") + @kafka.deliver_message(job_config, topic: @topic) + log("Submitted #{job} for processing") +end + + + + +submit_all_jobs diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 new file mode 100644 index 0000000000..859cf602c3 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 +export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} +export DP_LOGS={{ analytics.home }}/logs/data-products +export KAFKA_HOME={{ analytics.soft_path }}/kafka_2.11-0.10.1.0 + +## job broker-list and kafka-topic +job_brokerList={{ brokerlist }} +job_topic={{ analytics_job_queue_topic }} + +## Job to run daily +cd {{ analytics.home }}/scripts +source model-config.sh +today=$(date "+%Y-%m-%d") + +if [ -z "$job_config" ]; then job_config=$(config $1); fi + +echo "Submitted $1 with config $job_config" >> "$DP_LOGS/$today-job-execution.log" +echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' >> "$DP_LOGS/$today-job-execution-debug.log" +echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' > /tmp/job-request.json +cat /tmp/job-request.json | $KAFKA_HOME/bin/kafka-console-producer.sh --broker-list $job_brokerList --topic $job_topic >> "$DP_LOGS/$today-job-execution.log" 2>&1 diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 new file mode 100644 index 0000000000..edd03ff36b --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 @@ -0,0 +1,216 @@ +#!/usr/bin/env bash + +## Job to run daily + +cd "{{ analytics_cluster.home }}" +source model-config.sh +today=$(date "+%Y-%m-%d") + +while :; do + case $1 in + -j|--job) shift + job="$1" + ;; + -m|--mode) shift + mode="$1" + ;; + -p|--parallelisation) shift + parallelisation=$1 + ;; + -pa|--partitions) shift + partitions=$1 + ;; + -sd|--startDate) shift + start_date=$1 + ;; + -ed|--endDate) shift + end_date=$1 + ;; + -h|--sparkMaster) shift + sparkMaster=$1 + ;; + -sp|--selectedPartitions) shift + selected_partitions=$1 + ;; + *) break + esac + shift +done + +get_report_job_model_name(){ + case "$1" in + "assessment-dashboard-metrics") echo 'org.sunbird.analytics.job.report.AssessmentMetricsJobV2' + ;; + "course-dashboard-metrics") echo 'org.sunbird.analytics.job.report.CourseMetricsJobV2' + ;; + "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' + ;; + "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' + ;; + "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' + ;; + "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' + ;; + "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' + ;; + "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' + ;; + "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' + ;; + "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' + ;; + *) echo $1 + ;; + esac +} + +submit_cluster_job() { + # add batch number to config + echo "Running for below batch number $i" + batchNumberString="\\\"modelParams\\\":{\\\"batchNumber\\\":$i," + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"modelParams\":{'/$batchNumberString} + echo $finalConfig + echo "Running $job as parallel jobs" + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + echo "Submitted job for batchNumer $i below is the response" + echo $response +} + +job_id=$(get_report_job_model_name $job) + +if [ -z "$sparkMaster" ]; then sparkMaster="local[*]"; else sparkMaster="$sparkMaster"; fi + +if [ "$mode" = "via-partition" ]; then + endPartitions=`expr $partitions - 1` + if [ -z "$parallelisation" ]; then parallelisation=1; else parallelisation=$parallelisation; fi + # add partitions to config and start jobs + for i in $(seq 0 $parallelisation $endPartitions) + do + # add partitions to config + partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$(seq -s , $i `expr $i + $parallelisation - 1`)]" + if [ -z "$start_date" ]; then + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions." + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" + else + job_config=$(config $job '__endDate__') + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions via Replay-Supervisor." + classVariable="org.ekstep.analytics.job.ReplaySupervisor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" + fi + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + done + +elif [ "$mode" = "parallel-jobs" ]; then + # add batch number to config and submit jobs + echo "inside parallel-jobs block" + echo $parallelisation + if [ $parallelisation -ge 1 ]; then + for i in $(seq 1 $parallelisation) + do + submit_cluster_job $i & + done + else echo "No requests found in table"; fi + +elif [ "$mode" = "selected-partition" ]; then + # add partitions to config + partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$selected_partitions]" + if [ -z "$start_date" ]; then + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions." + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" + else + job_config=$(config $job '__endDate__') + cluster_job_config=${job_config//'"'/'\"'} + finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} + echo $finalConfig + echo "Running $job by partitions via Replay-Supervisor." + classVariable="org.ekstep.analytics.job.ReplaySupervisor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" + fi + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} +else + if [ -z "$start_date" ]; then + echo "Running $job without partition via run-job." + job_config=$(config $job) + cluster_job_config=${job_config//'"'/'\"'} + classVariable="org.ekstep.analytics.job.JobExecutor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\"]" + else + job_config=$(config $job '__endDate__') + cluster_job_config=${job_config//'"'/'\"'} + echo "Running $job without partition via Replay-Supervisor." + classVariable="org.ekstep.analytics.job.ReplaySupervisor" + argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" + fi + argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" + echo $argsStr + clusterConfig=`cat cluster-config.json` + requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} + finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} + echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + +fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 new file mode 100644 index 0000000000..cfd986b008 --- /dev/null +++ b/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 @@ -0,0 +1,119 @@ +from __future__ import division +import math +import psycopg2 +import sys +import pandas as pd +from IPython.display import display +from psycopg2 import sql, connect +import json + + +def updateExhaustRequests(db, table, update_list): + for r in update_list: + cursor = db.cursor() + batchNum = r['batch_number'] + requestId = r['request_id'] + insertQry = "UPDATE {0} SET batch_number = {1} WHERE request_id = '{2}'".format(table, batchNum, requestId) + n = cursor.execute(insertQry) + +def updateDruidRequests(db, table, update_list): + for r in update_list: + cursor = db.cursor() + batchNum = r['batch_number'] + reportId = r['report_id'] + insertQry = "UPDATE {0} SET batch_number = {1} WHERE report_id = '{2}'".format(table, batchNum, reportId) + n = cursor.execute(insertQry) + +def processRequests(totalRequestsDf, jobId, batchSize, db, table,jobType): + # Compute parallelism from batchSize & totalRequests + # update batch_number to table + + totalRequests = len(totalRequestsDf.index) + print("totalRequests {0}".format(totalRequests)) + + parallelism = int(math.ceil(totalRequests/batchSize)) + print("parallelism computed {0}".format(parallelism)) + + if totalRequests > 0: + if jobType == 'exhaust': + totalRequestsDf["row_num"] = totalRequestsDf.groupby(by=['job_id'])['request_id'].transform(lambda x: x.rank()) + else: + totalRequestsDf["row_num"] = totalRequestsDf['report_id'].transform(lambda x: x.rank()) + #display(totalRequestsDf) + + start_index = 1 + end_index = batchSize + for i in range(1, parallelism+1): + subSetDf = totalRequestsDf[(totalRequestsDf['row_num'] >= start_index) & (totalRequestsDf['row_num'] <= end_index)] + subSetDf["batch_number"] = i + print(start_index,end_index) + if jobType == 'exhaust': + updateExhaustRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) + else: + updateDruidRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) + start_index = 1 + end_index + end_index = end_index + batchSize + db.commit() + db.close() + return parallelism + else: + return 0 + +def postgresql_to_dataframe(db, select_query, column_names): + cursor = db.cursor() + try: + cursor.execute(select_query) + except (Exception, psycopg2.DatabaseError) as error: + print("Error: %s" % error) + return 1 + + tupples = cursor.fetchall() + + df = pd.DataFrame(tupples, columns=column_names) + #display(df) + return df + +def get_columns_names(db,table): + columns = [] + col_cursor = db.cursor() + col_names_str = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE " + col_names_str += "table_name = '{}';".format( table ) + try: + sql_object = sql.SQL(col_names_str).format(sql.Identifier( table)) + col_cursor.execute( sql_object ) + col_names = (col_cursor.fetchall()) + for tup in col_names: + columns += [ tup[0] ] + col_cursor.close() + except Exception as err: + print ("get_columns_names ERROR:", err) + return columns + +def main(batchSize, jobId,jobType,table): + host="{{postgres.db_url}}" + port={{postgres.db_port}} + user="{{postgres.db_username}}" + password="{{postgres.db_password}}" + database="{{postgres.db_name}}" + url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) + + db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) + + column_names = get_columns_names(db, table) + + if jobType == 'exhaust': + jobId = jobId.split("-v2")[0] if "-v2" in jobId else jobId + selectQuery = "select * from {0} where job_id = '{1}' and status IN ('SUBMITTED', 'FAILED') and iteration < 3;".format(table, jobId) + else: + selectQuery = "select * from {0} where status IN ('ACTIVE')".format(table) + df = postgresql_to_dataframe(db, selectQuery, column_names) + + parallelism = processRequests(df, jobId, batchSize, db, table,jobType) + return parallelism + +batchSize =int(sys.argv[2]) +jobId=sys.argv[1] +jobType = sys.argv[3] +table = sys.argv[4] +parallelism = main(batchSize, jobId,jobType,table) +print("returning parallelism value: {0}".format(parallelism)) diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 7eb22c7a18..690c51d87d 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -4,7 +4,7 @@ spark_output_temp_dir: /mount/data/analytics/tmp/ bucket: "telemetry-data-store" secor_bucket: "telemetry-data-store" -dp_object_store_type: "oci" +dp_object_store_type: "azure" dp_raw_telemetry_backup_location: "unique/raw/" dp_storage_key_config: "azure_storage_key" dp_storage_secret_config: "azure_storage_secret" @@ -210,9 +210,9 @@ admin_password: "{{ spark_cluster_user_password }}" spark_cluster_name: "{{env}}-spark-cluster" spark_cluster: - executor_core: 1 - executor_memory: 2G - num_executors: 1 + executor_core: 5 + executor_memory: 19G + num_executors: 5 analytics_cluster: home: "/tmp" diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 733c416138..c659f75113 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -6,14 +6,14 @@ - always - name: Ensure oci oss bucket exists - command: "oci os bucket get --name {{ bucket }}" + command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket - command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -32,7 +32,7 @@ - dataproducts-spark-cluster - name: Copy Core Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force async: 3600 poll: 10 when: dp_object_store_type == "oci" From f77981144363726c9a7c278db6df00814fcbaefd Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 7 Apr 2023 15:50:32 +1000 Subject: [PATCH 087/203] reverted data-products-deploy role Signed-off-by: Deepak Devadathan --- .../templates/cluster-config.json.j2 | 30 +------------- .../templates/submit-script.j2 | 39 ++----------------- 2 files changed, 4 insertions(+), 65 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 12ebf0bde0..e899827fdb 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -27,7 +27,7 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} { "jars": [ "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -55,32 +55,4 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "oci") %} -{ - "jars": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/submit-script.j2 b/ansible/roles/data-products-deploy/templates/submit-script.j2 index edd03ff36b..e8341dc1e8 100644 --- a/ansible/roles/data-products-deploy/templates/submit-script.j2 +++ b/ansible/roles/data-products-deploy/templates/submit-script.j2 @@ -1,7 +1,6 @@ #!/usr/bin/env bash ## Job to run daily - cd "{{ analytics_cluster.home }}" source model-config.sh today=$(date "+%Y-%m-%d") @@ -80,15 +79,7 @@ submit_cluster_job() { requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} + response=$(curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: admin_name }}") echo "Submitted job for batchNumer $i below is the response" echo $response } @@ -127,15 +118,7 @@ if [ "$mode" = "via-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} done elif [ "$mode" = "parallel-jobs" ]; then @@ -174,15 +157,8 @@ elif [ "$mode" = "selected-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} + else if [ -z "$start_date" ]; then echo "Running $job without partition via run-job." @@ -203,14 +179,5 @@ else requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" fi From 230b286ee87f4ff72528b3c1a332cd1eb5168845 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 7 Apr 2023 17:55:00 +1000 Subject: [PATCH 088/203] removed unwanted env Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index ae7e33e271..a4b47800fe 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -38,8 +38,6 @@ node('build-slave') { ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" cd /tmp ./create-cluster.sh - export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env - export ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass ''' From 04bf9d4689c6bc124775d7dfafc1d670819fb817 Mon Sep 17 00:00:00 2001 From: nikesh_g_gogia Date: Fri, 7 Apr 2023 14:41:04 +0530 Subject: [PATCH 089/203] Removed Restrat Cluster Logic - Done from Ambari --- .../templates/create-cluster-with-sleep.sh.j2 | 331 ------------------ .../templates/create-cluster.sh.j2 | 16 +- .../templates/delete-cluster.sh.j2 | 1 + 3 files changed, 3 insertions(+), 345 deletions(-) delete mode 100755 ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 deleted file mode 100755 index 54784435d7..0000000000 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster-with-sleep.sh.j2 +++ /dev/null @@ -1,331 +0,0 @@ -#! /bin/bash -# Subnet id will generate from env variable - -ambari_user="{{ambari_user}}" -cluster_password="{{cluster_password}}" -key_alias="{{key_alias}}" -user_id="{{user_id}}" -subnet="{{subnet_id}}" -compartment_id="{{compartment_id}}" -display_name="{{display_name}}" -workernode="{{workernode}}" -cluster_public_key="{{public_key}}" - -AMBARI_USER=$ambari_user -AMBARI_PWD=$cluster_password - -function get_bdsid() { - list_param=`oci bds instance list --compartment-id $compartment_id` - bdsid="NULL" - # echo $list_param | jq '.data' - state="ACTIVE" - disname="NULL" - for k in $(jq '.data | keys | .[]' <<< "$list_param"); do - # echo $k - cstate=`echo $list_param | jq -r '.data['$k']["lifecycle-state"]'` - if [ $cstate = $state ]; then - disname=`echo $list_param | jq -r '.data['$k']["display-name"]'` - if [ $disname = $display_name ]; then - bdsid=`echo $list_param | jq -r '.data['$k']["id"]'` - fi - - fi - echo "BDS ID" - echo $bdsid - done -} - -function getLivyip() { - - export bds_instance_id=$bdsid - bdsjson=$(oci bds instance get --bds-instance-id $bds_instance_id) - # echo "AMBARI URL" - ambari_url=`echo $bdsjson | jq -r '.data["cluster-details"]["ambari-url"]'` - # echo $ambari_url - livyip="NULL" - cnode="UTILITY" - for k in $(jq '.data["nodes"] | keys | .[]' <<< "$bdsjson"); do - node=`echo $bdsjson | jq -r '.data["nodes"]['$k']["node-type"]'` - if [ $node = "$cnode" ]; then - livyip=`echo $bdsjson | jq -r '.data["nodes"]['$k']["ip-address"]'` - fi - done - echo "LIVY IP" - echo $livyip - -} - -getlivyclustername() { - cdet=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$livyip:7183/api/v1/clusters/) - echo $cdet - for k in $(jq '.items | keys | .[]' <<< "$cdet"); do - # echo $k - cluster_name=`echo $cdet | jq -r '.items['$k']["Clusters"]["cluster_name"]'` - echo $cluster_name - done - echo "CLUSTER NAME" - -} - -function get_apidetails() { - - export bds_instance_id=$bdsid - - listapijson=$(oci bds bds-api-key list --bds-instance-id $bds_instance_id) - - #echo $listapijson | jq '.data[1]["key-alias"]' - id="NULL" - ctype="ACTIVE" - for k in $(jq '.data | keys | .[]' <<< "$listapijson"); do - type=`echo $listapijson | jq -r '.data['$k']["lifecycle-state"]'` - if [ $type = "$ctype" ]; then - id=`echo $listapijson | jq -r '.data['$k']["id"]'` - fi - done - - echo $id - - export api_key_id=$id - - list_api=`oci bds bds-api-key get --api-key-id $api_key_id --bds-instance-id $bds_instance_id` - - #echo $list_api | jq '.data' - - data=`echo $list_api | jq '.data'` - echo "API DETAILS" - echo $data - region=`echo $list_api | jq -r '.data["default-region"]'` - fingerprint=`echo $list_api | jq -r '.data["fingerprint"]'` - keyalias=`echo $list_api | jq -r '.data["key-alias"]'` - lifecyc=`echo $list_api | jq -r '.data["lifecycle-state"]'` - tm=`echo $list_api | jq -r '.data["time-created"]'` - usid=`echo $list_api | jq -r '.data["user-id"]'` - tenid=`echo $list_api | jq -r '.data["tenant-id"]'` - pemfilepath=`echo $list_api | jq -r '.data["pemfilepath"]'` - -} - -function update_bds_config(){ - #change below variables for your cluster - CONFIG_FILE_TO_UPDATE="" - - #Used when for restarting components after config update - #Wait time before we poll for restart status. Default 30 seconds. Meaning, We poll for restart status every 30 seconds - WAIT_TIME_IN_SEC=30 - - #No of tries before we give up on the restart status. Default 20. With default WAIT_TIME_IN_SEC as 30, At max we wait for 10(20*30=600 seconds) minutes before we give up. - RETRY_COUNT=20 - - #INTERNAL USE ONLY - propObj="" - - get_apidetails - getUtilityNodesIps=$livyip - getlivyclustername - echo $getUtilityNodesIps - getClusterName=$cluster_name - for utilityNodeIp in $getUtilityNodesIps - do - echo "Current utility node ip: $utilityNodeIp" - str1=$(nslookup $utilityNodeIp | awk -v var=$utilityNodeIp '/name =/{print var "\t", $4}') - CONFIG_FILE_TO_UPDATE="core-site" #this is the file we're updating in this example - propObj=$(get_property_json) - echo $propObj - echo "calling add properties" - - #update key value pairs. Multiple key value pairs can be updated before doing update_ambari_config - add_properties "fs.oci.client.auth.fingerprint" $fingerprint - add_properties "fs.oci.client.auth.passphrase" $passphrase - add_properties "fs.oci.client.auth.pemfilepath" $pemfilepath - add_properties "fs.oci.client.auth.tenantId" $tenid - add_properties "fs.oci.client.auth.userId" $usid - add_properties "fs.oci.client.regionCodeOrId" $region - #Update it to ambari - echo "updating ambari config" - update_ambari_config - - # echo "restarting all required components" - # restart_required_components - - done - -} - - -#Method to collect the current config -function get_property_json(){ - allConfs=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName?fields=Clusters/desired_configs) #to get all the configs - currVersionLoc=".Clusters.desired_configs.\"$CONFIG_FILE_TO_UPDATE\".tag" #fetching current version for property - propVersion=$(echo $allConfs | jq $currVersionLoc | tr -d '"') - propJson=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X GET "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/configurations?type=$CONFIG_FILE_TO_UPDATE&tag=$propVersion") #fetch property json - propLoc=".items[].properties" - propKeyVal=$(echo $propJson | jq $propLoc) - propObj="{\"properties\":$propKeyVal}" - echo $propObj -} - -#Method to add/update key value pair to existing config -function add_properties(){ - echo $1 $2 - echo $propObj - propObj=$(echo $propObj | jq '.properties += { "'$1'": "'$2'" }') - echo $propObj -} - -#Method to update config in ambari -function update_ambari_config(){ - parseableAddedProp=$(echo $propObj | jq '.properties') - echo $parseableAddedProp - timestamp=$(date +%s) - newVersion="version$timestamp" - finalJson='[{"Clusters":{"desired_config":[{"type":"'$CONFIG_FILE_TO_UPDATE'","tag":"'$newVersion'","properties":'$parseableAddedProp'}]}}]' - echo "CALING AMABRI API" - response_body_amb=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X PUT -d "$finalJson" "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName") - echo $response_body_amb - echo "DONE AMABRI API" -} - -#Method to restart required components -function restart_required_components(){ - echo "restarting all required components" - response_body=$(curl -u $AMBARI_USER:$AMBARI_PWD -H "X-Requested-By: ambari" -k -X POST -d '{"RequestInfo":{"command":"RESTART","context":"Restart all required services from bootstrap script","operation_level":"host_component"},"Requests/resource_filters":[{"hosts_predicate":"HostRoles/stale_configs=true&HostRoles/cluster_name='$getClusterName'"}]}' "https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests") - - echo "printing response_body: $response_body" - - idLoc=".Requests.id" - requestId=$(echo $response_body | jq $idLoc) - echo "request id is : $requestId" - - current_count=0 - while [[ $current_count -lt $RETRY_COUNT ]]; - do - current_count=$((current_count+1)) - response=$(curl -v -u $AMBARI_USER:$AMBARI_PWD -k -X GET https://$utilityNodeIp:7183/api/v1/clusters/$getClusterName/requests/$requestId) - request_status=$(echo $response | jq -r ".Requests.request_status") - echo "printing request_status: $request_status" - if [[ $request_status == "IN_PROGRESS" ]] || [[ $request_status == "PENDING" ]]; then - echo "current_count is : $current_count" - sleep $WAIT_TIME_IN_SEC - elif [[ $request_status == "COMPLETED" ]]; then - echo "Restart successful" - break - fi - done -} - -function creat_api(){ - export bds_instance_id=$bdsid - export key_alias=$key_alias # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-key-alias - export passphrase=$b64p # https://docs.cloud.oracle.com/en-us/iaas/tools/oci-cli/latest/oci_cli_docs/cmdref/bds/bds-api-key/create.html#cmdoption-passphrase - export user_id=$user_id - oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id -} - -function restart_bds_cluster() { - # oci cli command to stop - echo "STOPPING CLUSTER" - oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true - sleep 10m - # oci cli command to start - echo "STARTING CLUSTER" - oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p - sleep 15m -} - - -# Below is tenancy - -function create_cluster() { - - export compartment_id=$compartment_id - - master=1 - utility=1 - - worker=$workernode # This has to be replaced with Jenkins Paramter - - # Begin script in case all parameters are correct - echo "Generating json woth $master master ndoes $utility utility nodes and $worker worker nodes" - json="[" - - for i in `seq 1 $master` - do - json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" - done - - for i in `seq 1 $utility` - do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" - done - - for i in `seq 1 $worker` - do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" - done - - json="$json]" - printf "$json" > "nodes.json" - echo "File successfully generated and saved as nodes.json" - - echo "CREATING THE BDS CLUSTER" - - export cluster_public_key=$public_key - export cluster_version="ODH2_0" - export display_name=$display_name - export is_high_availability='false' - export is_secure='false' - - cmd="oci bds instance create --cluster-admin-password '$b64p' --cluster-public-key '$cluster_public_key' --cluster-version '$cluster_version' --compartment-id '$compartment_id' --display-name '$display_name' --is-high-availability $is_high_availability --is-secure $is_secure --nodes file://nodes.json " - echo $cmd - eval "$cmd" - -} - -function replace_host() { - echo "REPLACE THE HOSTS" - echo "" >> {{inventory_dir}}/hosts - echo "[bds-livy-node]" >> {{inventory_dir}}/hosts - echo "$livyip ansible_ssh_user=opc" >> {{inventory_dir}}/hosts - echo "" >> {{inventory_dir}}/hosts - -} - -# MAIN TO START - -b64p=`echo -n $cluster_password | base64` -echo $b64p -echo $compartment_id - -echo "CREATING CLUSTER" - -create_cluster - -echo "WAITING CLUSTER TO CREATE" - -sleep 42m - -echo "FETCHING BDS ID" - -get_bdsid # This sets BDS ID - -echo "GET LIVY-AMBARI IP" - -getLivyip # This will be ambari ip also - -replace_host - -echo "CREATE OBJECT STORAGE API KEY" - -creat_api - -echo "WAITING FOR API TO CREATE" - -sleep 5m - -echo "UPDATE BDS AMBARI CONFIG" - -get_apidetails - -update_bds_config - -restart_bds_cluster \ No newline at end of file diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index e4295dded8..541a1ec561 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -1,6 +1,7 @@ #! /bin/bash # Subnet id will generate from env variable -# Version 1 Running Fine +# Version 1 Author Nikesh Gogia and Ali Shemshadi + ambari_user="{{ambari_user}}" cluster_password="{{cluster_password}}" key_alias="{{key_alias}}" @@ -227,18 +228,6 @@ function create_api(){ oci bds bds-api-key create --bds-instance-id $bds_instance_id --key-alias $key_alias --passphrase $passphrase --user-id $user_id --wait-for-state $capi --max-wait-seconds $cwait } -function restart_bds_cluster() { - # oci cli command to stop - echo "STOPPING CLUSTER" - cstate='SUCCEEDED' - cwait=2000 - oci bds instance stop --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --is-force-stop-jobs true --wait-for-state=$cstate --max-wait-seconds $cwait - # oci cli command to start - echo "STARTING CLUSTER" - cstate='SUCCEEDED' - oci bds instance start --bds-instance-id $bds_instance_id --cluster-admin-password $b64p --wait-for-state=$cstate --max-wait-seconds $cwait -} - # Below is tenancy @@ -326,4 +315,3 @@ get_apidetails update_bds_config -# restart_bds_cluster diff --git a/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 index b5e1d28d36..3e3df78b33 100755 --- a/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/delete-cluster.sh.j2 @@ -1,4 +1,5 @@ #!/bin/bash +# Version 1 - Author Nikesh Gogia nikesh.g.gogia@oracle.com compartment_id="{{compartment_id}}" display_name="{{display_name}}" From 6604a0bd3869026aa9c408d2aced792e21201ffe Mon Sep 17 00:00:00 2001 From: nikesh_g_gogia Date: Sat, 8 Apr 2023 12:01:34 +0530 Subject: [PATCH 090/203] Handled Ambari Restart logic Signed-off-by: nikesh_g_gogia --- .../roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index 541a1ec561..94479d0764 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -315,3 +315,5 @@ get_apidetails update_bds_config +echo "BDS Config Completed and Ambari Restarted" + From 8e1f04aad8377218c980fde50692a9f2cadd6d1d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 06:58:13 +1000 Subject: [PATCH 091/203] updated jenkins.bds to consume credential from credstore Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 30 ++++++++++++----------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index a4b47800fe..73de054790 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -14,13 +14,14 @@ node('build-slave') { } stage('copy cluster creation script') { + withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { values = [:] envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=$ambari_user cluster_password=$cluster_password key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) @@ -29,21 +30,22 @@ node('build-slave') { values.put('ansibleExtraArgs', ansibleExtraArgs) println values ansible_playbook_run(values) + } } - stage('create and provision spark OCI BDS') { - oci_namespace=params.oci_namespace - //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - sh ''' - currentws=$(pwd) - ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - cd /tmp - ./create-cluster.sh - export ANSIBLE_HOST_KEY_CHECKING=False - ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass - ''' - //} + // stage('create and provision spark OCI BDS') { + // oci_namespace=params.oci_namespace + // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + // sh ''' + // currentws=$(pwd) + // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + // cd /tmp + // ./create-cluster.sh + // export ANSIBLE_HOST_KEY_CHECKING=False + // ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + // ''' + // //} - } + // } } } From 00eef6909a19bf1c3c2064f0a238f35c7b10aa9d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:05:44 +1000 Subject: [PATCH 092/203] parametrized create cluster Signed-off-by: Deepak Devadathan --- .../oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 4 ++-- pipelines/provision/spark/Jenkinsfile.bds | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index 94479d0764..2e880a92d2 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -2,8 +2,8 @@ # Subnet id will generate from env variable # Version 1 Author Nikesh Gogia and Ali Shemshadi -ambari_user="{{ambari_user}}" -cluster_password="{{cluster_password}}" +ambari_user="${1}" +cluster_password="${2}" key_alias="{{key_alias}}" user_id="{{user_id}}" subnet="{{subnet_id}}" diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 73de054790..d7ada30bab 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -14,7 +14,6 @@ node('build-slave') { } stage('copy cluster creation script') { - withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { values = [:] envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() @@ -30,7 +29,7 @@ node('build-slave') { values.put('ansibleExtraArgs', ansibleExtraArgs) println values ansible_playbook_run(values) - } + } // stage('create and provision spark OCI BDS') { // oci_namespace=params.oci_namespace From 6c76fbf4aaa16ebc6f5fa41eaeae8fc9a6ec961f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:06:37 +1000 Subject: [PATCH 093/203] corrected syntax Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index d7ada30bab..5d0f175308 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=$ambari_user cluster_password=$cluster_password key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From cc12811533a81a733a1bbfc0fc11a3f4d7ecc47d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:19:00 +1000 Subject: [PATCH 094/203] disabled delete playbook for testing Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.delete | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index 5675a7e1df..ff5a9c5384 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) @@ -30,18 +30,18 @@ node('build-slave') { println values ansible_playbook_run(values) } - stage('create and provision spark OCI BDS') { - storage_container=params.storage_container - //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - sh ''' - currentws=$(pwd) - ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - cd /tmp - ./delete-cluster.sh - ''' - //} + // stage('create and provision spark OCI BDS') { + // storage_container=params.storage_container + // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + // sh ''' + // currentws=$(pwd) + // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + // cd /tmp + // ./delete-cluster.sh + // ''' + // //} - } + // } } } From ce11e924e7dadae21f46b4e362026c1b26210760 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:21:33 +1000 Subject: [PATCH 095/203] removed workernode parameter redundant Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.delete | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index ff5a9c5384..a469192dc8 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 21b8166b3ac404cdb957ec05fd4034dc12d3374a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 07:30:33 +1000 Subject: [PATCH 096/203] testing create spark cluster Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 26 +++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 5d0f175308..3deb57cc6a 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -31,20 +31,20 @@ node('build-slave') { ansible_playbook_run(values) } - // stage('create and provision spark OCI BDS') { - // oci_namespace=params.oci_namespace - // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - // sh ''' - // currentws=$(pwd) - // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - // cd /tmp - // ./create-cluster.sh - // export ANSIBLE_HOST_KEY_CHECKING=False - // ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass - // ''' - // //} + stage('create and provision spark OCI BDS') { + oci_namespace=params.oci_namespace + withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + ./create-cluster.sh $ambari_user $cluster_password + export ANSIBLE_HOST_KEY_CHECKING=False + ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ''' + } - // } + } } } From 15832a59bf345017d8af4250c670d8b3010e62fc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 08:32:48 +1000 Subject: [PATCH 097/203] updated delete cluster jenkins Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.delete | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pipelines/provision/spark/Jenkinsfile.delete b/pipelines/provision/spark/Jenkinsfile.delete index a469192dc8..ce5e0b19dd 100644 --- a/pipelines/provision/spark/Jenkinsfile.delete +++ b/pipelines/provision/spark/Jenkinsfile.delete @@ -30,18 +30,18 @@ node('build-slave') { println values ansible_playbook_run(values) } - // stage('create and provision spark OCI BDS') { - // storage_container=params.storage_container - // //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { - // sh ''' - // currentws=$(pwd) - // ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - // cd /tmp - // ./delete-cluster.sh - // ''' - // //} + stage('create and provision spark OCI BDS') { + storage_container=params.storage_container + //withCredentials([usernamePassword(credentialsId: 'azure-service-principal', passwordVariable: 'sppass', usernameVariable: 'spuser')]) { + sh ''' + currentws=$(pwd) + ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" + cd /tmp + ./delete-cluster.sh + ''' + //} - // } + } } } From 20ca43d3132d0f655a26db1481961afbe1ee3c6d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 10:49:52 +1000 Subject: [PATCH 098/203] updated the spark deploy job for bds Signed-off-by: Deepak Devadathan --- .../data-products-deploy/defaults/main.yml | 1 + .../roles/data-products-deploy/tasks/main.yml | 14 +++---- .../templates/cluster-config.json.j2 | 30 +++++++++++++- .../templates/submit-script.j2 | 39 +++++++++++++++++-- ansible/spark-cluster-job-submit.yml | 1 + .../spark-cluster-deploy/Jenkinsfile.parallel | 2 +- 6 files changed, 75 insertions(+), 12 deletions(-) diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 690c51d87d..38ddd70420 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -1,6 +1,7 @@ analytics_user: analytics analytics_group: analytics spark_output_temp_dir: /mount/data/analytics/tmp/ +oci_install_loc: /home/{{analytics_user}}/bin/ bucket: "telemetry-data-store" secor_bucket: "telemetry-data-store" diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index c659f75113..ee6b40c1cb 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -6,14 +6,14 @@ - always - name: Ensure oci oss bucket exists - command: "/home/{{analytics_user}}/bin/oci os bucket get --name {{ bucket }}" + command: "{{oci_install_loc}}/oci os bucket get --name {{ bucket }}" register: check_bucket when: dp_object_store_type == "oci" tags: - always - name: Create oci oss bucket - command: "/home/{{analytics_user}}/bin/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" + command: "{{oci_install_loc}}/oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" when: dp_object_store_type == "oci" and check_bucket.rc !=0 tags: - always @@ -32,7 +32,7 @@ - dataproducts-spark-cluster - name: Copy Core Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -54,7 +54,7 @@ - ed-dataproducts-spark-cluster - name: Copy Ed Data Products to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -75,7 +75,7 @@ - framework-spark-cluster - name: Copy Framework Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -96,7 +96,7 @@ - framework-spark-cluster - name: Copy Scruid Library to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" @@ -154,7 +154,7 @@ - framework-spark-cluster - name: Copy configuration file to oci oss - command: /home/{{analytics_user}}/bin/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force" async: 3600 poll: 10 when: dp_object_store_type == "oci" diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index e899827fdb..12ebf0bde0 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -27,7 +27,7 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} +{% elif (dp_object_store_type == "s3") %} { "jars": [ "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", @@ -55,4 +55,32 @@ "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } +{% elif (dp_object_store_type == "oci") %} +{ + "jars": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + ], + "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "files": [ + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + ], + "className": "org.ekstep.analytics.job.JobExecutor", + "executorCores": {{ spark_cluster.executor_core }}, + "executorMemory": "{{ spark_cluster.executor_memory }}", + "numExecutors": {{ spark_cluster.num_executors }}, + "conf": { + "spark.sql.autoBroadcastJoinThreshold" : "-1", + "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", + "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", + "spark.scheduler.mode" : "FAIR", + "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", + "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", + "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + } +} {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/submit-script.j2 b/ansible/roles/data-products-deploy/templates/submit-script.j2 index e8341dc1e8..0e629a4ce8 100644 --- a/ansible/roles/data-products-deploy/templates/submit-script.j2 +++ b/ansible/roles/data-products-deploy/templates/submit-script.j2 @@ -1,6 +1,7 @@ #!/usr/bin/env bash ## Job to run daily + cd "{{ analytics_cluster.home }}" source model-config.sh today=$(date "+%Y-%m-%d") @@ -79,7 +80,15 @@ submit_cluster_job() { requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - response=$(curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: admin_name }}") +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} echo "Submitted job for batchNumer $i below is the response" echo $response } @@ -118,7 +127,15 @@ if [ "$mode" = "via-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} done elif [ "$mode" = "parallel-jobs" ]; then @@ -157,8 +174,15 @@ elif [ "$mode" = "selected-partition" ]; then requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody +{% if dp_object_store_type == "azure" %} +{ curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" - +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} else if [ -z "$start_date" ]; then echo "Running $job without partition via run-job." @@ -179,5 +203,14 @@ else requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} echo $finalRequestBody - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +{% if dp_object_store_type == "azure" %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% elif (dp_object_store_type == "oci") %} +{ + curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_bds_url }}:8998/batches' -H "X-Requested-By: {{ admin_name }}" +} +{% endif %} + fi diff --git a/ansible/spark-cluster-job-submit.yml b/ansible/spark-cluster-job-submit.yml index ba4e017a23..8924fce8f2 100644 --- a/ansible/spark-cluster-job-submit.yml +++ b/ansible/spark-cluster-job-submit.yml @@ -6,6 +6,7 @@ environment: AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + OCI_CLI_AUTH: instance_principal roles: - data-products-deploy diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel index 4b9891d62a..dad65d4e73 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type} -vvvv " + ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} oci_install_loc=${params.oci_install_loc} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type} -vvvv " values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From e278a689c4b87ca70952ca8be2813f5ae7ec0428 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 14:28:47 +1000 Subject: [PATCH 099/203] removed redundant role Signed-off-by: Deepak Devadathan --- .../defaults/main.yml | 282 -------- .../collection-summary-ingestion-spec.json | 251 ------- .../files/sourcing-ingestion-spec.json | 146 ---- .../tasks/main.yml | 499 ------------- .../templates/cluster-config.json.j2 | 86 --- .../templates/common.conf.j2 | 317 --------- .../templates/exhaust_sanity_check.py.j2 | 58 -- .../templates/log4j2.xml.j2 | 54 -- .../templates/model-config.j2 | 151 ---- .../templates/model-config.json.j2 | 670 ------------------ .../templates/model-dock-config.j2 | 34 - .../templates/replay-job.j2 | 63 -- .../templates/replay-updater.j2 | 24 - .../templates/replay-utils.j2 | 43 -- .../templates/run-dock-job.j2 | 41 -- .../templates/run-job.j2 | 83 --- .../templates/start-jobmanager.j2 | 46 -- .../templates/submit-all-jobs.rb.j2 | 58 -- .../templates/submit-job.j2 | 22 - .../templates/submit-script.j2 | 216 ------ .../templates/update-job-requests.py.j2 | 119 ---- 21 files changed, 3263 deletions(-) delete mode 100755 ansible/roles/data-products-deploy-oci-bds/defaults/main.yml delete mode 100644 ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json delete mode 100644 ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json delete mode 100644 ansible/roles/data-products-deploy-oci-bds/tasks/main.yml delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 delete mode 100644 ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 diff --git a/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml b/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml deleted file mode 100755 index 7eb22c7a18..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/defaults/main.yml +++ /dev/null @@ -1,282 +0,0 @@ -analytics_user: analytics -analytics_group: analytics -spark_output_temp_dir: /mount/data/analytics/tmp/ - -bucket: "telemetry-data-store" -secor_bucket: "telemetry-data-store" -dp_object_store_type: "oci" -dp_raw_telemetry_backup_location: "unique/raw/" -dp_storage_key_config: "azure_storage_key" -dp_storage_secret_config: "azure_storage_secret" -dp_reports_storage_key_config: "reports_azure_storage_key" -dp_reports_storage_secret_config: "reports_azure_storage_secret" - -kafka_broker_host: "{{groups['processing-cluster-kafka'][0]}}:9092" -ingestion_kafka_broker_host: "{{groups['ingestion-cluster-kafka'][0]}}:9092" -brokerlist: "{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" -zookeeper: "{{groups['processing-cluster-zookeepers']|join(':2181,')}}:2181" -dp_username: dp-monitor -analytics_job_queue_topic: "{{ env }}.analytics.job_queue" -topic: "{{ env }}.telemetry.derived" -analytics_metrics_topic: "{{ env }}.analytics_metrics" -sink_topic: "{{ env }}.telemetry.sink" -assess_topic: "{{ env }}.telemetry.assess" -metrics_topic: "{{ env }}.telemetry.metrics" -job_manager_tmp_dir: "transient-data" -channel: dev-test -druid_broker_host: "{{groups['raw-broker'][0]}}" -druid_router_host: "{{groups['raw-router'][0]}}" -druid_rollup_broker_host: "{{groups['raw-broker'][0]}}" -hierarchySearchServiceUrl: "{{ proto }}://{{ domain_name }}/action/content" -hierarchySearchServicEndpoint: /v3/hierarchy/ - -user_table_keyspace: "sunbird" -course_keyspace: "sunbird_courses" -hierarchy_store_keyspace: "{{ env }}_hierarchy_store" -job_request_table: "{{ env }}_job_request" -dataset_metadata_table: "{{ env }}_dataset_metadata" -report_user_table_keyspace: "sunbird_courses" -report_user_enrolment_table: "report_user_enrolments" - -analytics_job_list: '"wfs", "content-rating-updater", "monitor-job-summ"' -analytics_jobs_count: 3 - -cassandra_keyspace_prefix: '{{ env }}_' -report_cassandra_cluster_host: "{{ report_cassandra_host | default(core_cassandra_host) }}" -cassandra_hierarchy_store_keyspace: "{{ env_name}}_hierarchy_store" -spark_version: 3.1.3 - -heap_memory: "-Xmx5120m" - -spark: - home: "{{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7" - public_dns: 54.255.154.146 - master: - url: spark://172.31.11.117:7077 - host: 172.31.11.117 - worker: - instances: 1 - cores: 2 - memory: 4g - driver: - memory: 3g - executor: - memory: 4g - driver_memory: 7g - memory_fraction: 0.3 - storage_fraction: 0.5 - executor_memory: 2g - heap_conf_str: '"-XX:+UseG1GC -XX:MaxGCPauseMillis=100 -Xms250m {{ heap_memory }} -XX:+UseStringDeduplication"' - -submit_jobs: - submit-all-jobs: - hour: 02 - minute: 35 - -start_jobmanager: - job-manager: - hour: 02 - minute: 30 -have_weekly_jobs: false - -course_batch_status_updater_job_schedule: 60 - -run_wfs_job: - wfs: - hour: 00 - minute: 30 -run_monitor_job: - monitor-job-summ: - hour: 03 - minute: 00 - -run_admin_user_reports_job: - admin-user-reports-3AMIST: - hour: 21 - minute: 30 - admin-user-reports-2PMIST: - hour: 8 - minute: 30 - -run_admin_geo_reports_job: - admin-geo-reports-4AMIST: - hour: 22 - minute: 30 - admin-geo-reports-3PMIST: - hour: 9 - minute: 30 - -run_assessment_aggregator_report_job: - assessment-aggregator-report: - hour: 18 - minute: 35 - -update_user_redis_cache: - populate-user-cache: - hour: 3 - minute: 00 - -index_content_model_druid: - index_content: - hour: 1 - minute: 00 - -run_etb_metrics_weekly_job: - etb-metrics-weekly: - hour: 23 - minute: 30 - weekday: 1 - -# These are the dummy times till sept30 for exhaust reports -#To-Do: Update time after 3.2.7 deployment - -run_progress_exhaust: - progress-exhaust: - hour: 08 - minute: 00 - -run_response_exhaust: - response-exhaust: - hour: 09 - minute: 00 - -run_userinfo_exhaust: - userinfo-exhaust: - hour: 10 - minute: 00 - -run_collection_summary: - collection-summary: - hour: 09 - minute: 30 - -run_sourcing_summary: - sourcing-summary: - hour: 10 - minute: 30 - -run_cassandra_migration: - cassandra-migration: - hour: 19 - minute: 15 - -run_uci_private_exhaust_job: - uci-private-exhaust: - hour: 03 - minute: 00 - -run_uci_response_exhaust_job: - uci-response-exhaust: - hour: 02 - minute: 00 - - -service: - search: - url: http://{{private_ingressgateway_ip}}/search - path: /v3/search - -es_search_index: "compositesearch" -analytics: - home: /mount/data/analytics - soft_path: /mount/data/analytics - paths: ['/mount/data/analytics', '/mount/data/analytics/logs', '/mount/data/analytics/logs/services', '/mount/data/analytics/logs/data-products', '/mount/data/analytics/tmp', '/mount/data/analytics/scripts', '/mount/data/analytics/models' ] - scripts: ['model-config', 'replay-job', 'replay-updater', 'replay-utils', 'run-job', 'submit-job', 'start-jobmanager', 'submit-script'] - dockScripts: ['model-dock-config','run-dock-job'] - -# artifact versions -analytics_core_artifact_ver: "2.0" -analytics_ed_dataporducts_artifact_ver: "1.0" -scruid_artifact_ver: "2.5.0" - -producer_env: "dev.sunbird" -analytics_job_manager_artifact: "job-manager-{{ analytics_core_artifact_ver }}.jar" -analytics_core_artifact: "analytics-framework-{{ analytics_core_artifact_ver }}.jar" -scruid_artifact: "scruid_2.12-{{ scruid_artifact_ver }}.jar" -analytics_batch_module_artifact: "batch-models-{{ analytics_core_artifact_ver }}.jar" -analytics_ed_dataporducts_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}-distribution.tar.gz" -model_version: "2.0" - -submit_jobs_auth_token: "{{ sunbird_api_auth_token }}" -report_list_jobs_url: "{{ druid_report_url }}" - -reports_container: "reports" - -# Cluster vars -spark_cluster_user_password: "" -spark_cluster_user_name: "" -admin_name: "{{ spark_cluster_user_name }}" -admin_password: "{{ spark_cluster_user_password }}" -spark_cluster_name: "{{env}}-spark-cluster" - -spark_cluster: - executor_core: 1 - executor_memory: 2G - num_executors: 1 - -analytics_cluster: - home: "/tmp" - -analytics_ed_dataporducts_jar_artifact: "data-products-{{ analytics_ed_dataporducts_artifact_ver }}.jar" - -spark_enable_dynamic_allocation: false -# Spark Cassandra config-vars -spark_cassandra_connection_timeout_millis: 30000 -spark_cassandra_query_timeout_millis: 180000 -spark_cassandra_query_max_rows_fetch_count: 1000 -spark_sql_shuffle_partitions: 200 - -druid_report_postgres_db_name: druid -druid_report_postgres_db_username: druid - - -#Override this variable in production and point to druid rollup ingestion cluster -# Example: "http://$rollup_cluster_ip:8090" -druid_rollup_cluster_ingestion_task_url: "http://{{groups['raw-overlord'][0]}}:8081" - -# On demand Exhaust throttling vars -exhaust_batches_limit_per_channel: 30 -exhaust_file_size_limit_bytes_per_channel: 1073741824 - -exhaust_parallel_batch_load_limit: 10 -exhaust_user_parallelism: 200 - -data_exhaust_batch_limit_per_request: 20 - -# Start Of UCI Related Variables -uci_postgres_host: "dev-pg11.postgres.database.azure.com" -uci_encryption_key_base64: "" -uci_bot_postgres_database: uci-botdb -uci_fusionauth_postgres_database: uci-fusionauth -uci_postgres_user: "{{postgres.db_username}}" -uci_postgres_password: "{{postgres.db_password}}" - -uci_postgres: - conversation_db_name: "{{ uci_bot_postgres_database }}" - conversation_db_host: "{{ uci_postgres_host }}" - conversation_db_port: "5432" - conversation_db_user: "{{ uci_postgres_user }}" - conversation_db_psss: "{{ uci_postgres_password }}" - conversation_table_name: "bot" - fushionauth_db_name: "{{ uci_fusionauth_postgres_database }}" - fushionauth_db_host: "{{ uci_postgres_host }}" - fushionauth_db_port: "5432" - fushionauth_db_user: "{{ uci_postgres_user }}" - fushionauth_db_psss: "{{ uci_postgres_password }}" - user_table_name: "users" - user_registration_table_name: "user_registrations" - user_identities_table_name: "identities" - -uci_encryption_secret_key: "{{uci_encryption_key_base64}}" -uci_pdata_id: "{{uci_env}}.uci.{{sunbird_instance}}" - -# End Of UCI Related Variables - -# Exhaust sanity check vars -cassandra_migrator_job_name: "Cassandra Migrator" - -assessment_metric_primary_category: "{{ exhaust_job_assessment_primary_category }}" - -# Default s3 variables -sunbird_private_s3_storage_key: "" -sunbird_private_s3_storage_secret: "" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json deleted file mode 100644 index 69e13196e2..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/files/collection-summary-ingestion-spec.json +++ /dev/null @@ -1,251 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "collection-summary-snapshot", - "parser": { - "type": "string", - "parseSpec": { - "format": "json", - "flattenSpec": { - "useFieldDiscovery": false, - "fields": [ - { - "type": "root", - "name": "content_org", - "expr": "contentorg" - }, - { - "type": "root", - "name": "user_org", - "expr": "orgname" - }, - { - "type": "root", - "name": "batch_start_date", - "expr": "startdate" - }, - { - "type": "root", - "name": "batch_end_date", - "expr": "enddate" - }, - { - "type": "root", - "name": "has_certificate", - "expr": "hascertified" - }, - { - "type": "root", - "name": "collection_id", - "expr": "courseid" - }, - { - "type": "root", - "name": "batch_id", - "expr": "batchid" - }, - { - "type": "root", - "name": "collection_name", - "expr": "collectionname" - }, - { - "type": "root", - "name": "batch_name", - "expr": "batchname" - }, - { - "type": "root", - "name": "total_enrolment", - "expr": "enrolleduserscount" - }, - { - "type": "root", - "name": "total_completion", - "expr": "completionuserscount" - }, - { - "type": "root", - "name": "total_certificates_issued", - "expr": "certificateissuedcount" - }, - { - "type": "root", - "name": "content_status", - "expr": "contentstatus" - }, - { - "type": "root", - "name": "user_state", - "expr": "state" - }, - { - "type": "root", - "name": "user_district", - "expr": "district" - }, - { - "type": "root", - "name": "content_channel", - "expr": "channel" - }, - { - "type": "root", - "name": "keywords", - "expr": "keywords" - }, - { - "type": "root", - "name": "timestamp", - "expr": "timestamp" - }, - { - "type": "root", - "name": "medium", - "expr": "medium" - }, - { - "type": "root", - "name": "subject", - "expr": "subject" - }, - { - "type": "root", - "name": "created_for", - "expr": "createdfor" - }, - { - "type": "root", - "name": "user_type", - "expr": "usertype" - }, - { - "type": "root", - "name": "user_subtype", - "expr": "usersubtype" - } - ] - }, - "dimensionsSpec": { - "dimensions": [ - { - "name": "content_org" - }, - { - "name": "user_org" - }, - { - "type": "string", - "name": "batch_id" - }, - { - "type": "string", - "name": "batch_start_date" - }, - { - "type": "string", - "name": "batch_end_date" - }, - { - "type": "string", - "name": "collection_id" - }, - { - "type": "string", - "name": "collection_name" - }, - { - "type": "string", - "name": "batch_name" - }, - { - "type": "long", - "name": "total_enrolment" - }, - { - "type": "long", - "name": "total_completion" - }, - { - "type": "long", - "name": "total_certificates_issued" - }, - { - "type": "string", - "name": "content_status" - }, - { - "type": "string", - "name": "user_state" - }, - { - "type": "string", - "name": "user_district" - }, - { - "name": "keywords" - }, - { - "name": "has_certificate" - }, - { - "type": "string", - "name": "content_channel" - }, - { - "name": "medium" - }, - { - "name": "subject" - }, - { - "name": "created_for" - }, - { - "type": "string", - "name": "user_type" - }, - { - "type": "string", - "name": "user_subtype" - } - ], - "dimensionsExclusions": [] - }, - "timestampSpec": { - "column": "timestamp", - "format": "auto" - } - } - }, - "metricsSpec": [], - "granularitySpec": { - "type": "uniform", - "segmentGranularity": "day", - "queryGranularity": "none", - "rollup": true - } - }, - "ioConfig": { - "type": "index", - "firehose": { - "type": "static-azure-blobstore", - "blobs": [ - { - "container": "reports", - "path": "/collection-summary-reports-v2/collection-summary-report-latest.json" - } - ], - "fetchTimeout": 300000 - } - }, - "tuningConfig": { - "type": "index", - "targetPartitionSize": 5000000, - "maxRowsInMemory": 25000, - "forceExtendableShardSpecs": false, - "logParseExceptions": true - } - } -} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json b/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json deleted file mode 100644 index 69e773d457..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/files/sourcing-ingestion-spec.json +++ /dev/null @@ -1,146 +0,0 @@ -{ - "type": "index", - "spec": { - "dataSchema": { - "dataSource": "sourcing-summary-snapshot", - "parser": { - "type": "string", - "parseSpec": { - "format": "json", - "flattenSpec": { - "useFieldDiscovery": false, - "fields": [ - { - "type": "root", - "name": "program_id", - "expr": "program_id" - }, - { - "type": "root", - "name": "status", - "expr": "status" - }, - { - "type": "root", - "name": "rootorg_id", - "expr": "rootorg_id" - }, - { - "type": "root", - "name": "user_id", - "expr": "user_id" - }, - { - "type": "root", - "name": "osid", - "expr": "osid" - }, - { - "type": "root", - "name": "user_type", - "expr": "user_type" - }, - { - "type": "root", - "name": "contributor_id", - "expr": "contributor_id" - }, - { - "type": "root", - "name": "total_contributed_content", - "expr": "total_contributed_content" - }, - { - "type": "root", - "name": "primary_category", - "expr": "primary_category" - }, - { - "type": "root", - "name": "created_by", - "expr": "created_by" - } - ] - }, - "dimensionsSpec": { - "dimensions": [ - { - "type": "string", - "name": "program_id" - }, - { - "type": "string", - "name": "status" - }, - { - "type": "string", - "name": "rootorg_id" - }, - { - "type": "string", - "name": "user_id" - }, - { - "type": "string", - "name": "osid" - }, - { - "type": "string", - "name": "user_type" - }, - { - "type": "string", - "name": "contributor_id" - }, - { - "type": "string", - "name": "primary_category" - }, - { - "type": "string", - "name": "created_by" - } - ], - "dimensionsExclusions": [] - }, - "timestampSpec": { - "column": "timestamp", - "format": "auto" - } - } - }, - "metricsSpec": [ - { - "name": "total_count", - "type": "count" - } - ], - "granularitySpec": { - "type": "uniform", - "segmentGranularity": "day", - "queryGranularity": "none", - "rollup": true - } - }, - "ioConfig": { - "type": "index", - "firehose": { - "type": "static-azure-blobstore", - "blobs": [ - { - "container": "reports", - "path": "/sourcing/SourcingSummaryReport.json" - } - ], - "fetchTimeout": 300000 - } - }, - "tuningConfig": { - "type": "index", - "targetPartitionSize": 5000000, - "maxRowsInMemory": 25000, - "forceExtendableShardSpecs": false, - "logParseExceptions": true - } - } -} diff --git a/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml b/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml deleted file mode 100644 index 733c416138..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/tasks/main.yml +++ /dev/null @@ -1,499 +0,0 @@ -## Data products deployment ## -- name: Ensure azure blob storage container exists - command: az storage container create --name {{ bucket }} - when: dp_object_store_type == "azure" - tags: - - always - -- name: Ensure oci oss bucket exists - command: "oci os bucket get --name {{ bucket }}" - register: check_bucket - when: dp_object_store_type == "oci" - tags: - - always - -- name: Create oci oss bucket - command: "oci os bucket create -c {{oci_bucket_compartment}} --name {{bucket}}" - when: dp_object_store_type == "oci" and check_bucket.rc !=0 - tags: - - always - -- name: Copy Core Data Products - copy: src={{ analytics_batch_module_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - dataproducts - -- name: Copy Core Data Products to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - dataproducts-spark-cluster - -- name: Copy Core Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_batch_module_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }} --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - dataproducts-spark-cluster - -- name: Unarchive Ed Data Products - become: yes - unarchive: src={{ playbook_dir}}/{{ analytics_ed_dataporducts_artifact }} dest={{ analytics.home }}/models-{{ model_version }} copy=yes group={{ analytics_group }} owner={{ analytics_user }} - tags: - - ed-dataproducts - -- name: Copy Ed Data Products to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar -f {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - ed-dataproducts-spark-cluster - -- name: Copy Ed Data Products to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/data-products-1.0.jar --file {{ analytics.home }}/models-{{ model_version }}/data-products-1.0/data-products-1.0.jar --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - ed-dataproducts-spark-cluster - -- name: Copy Framework Library - copy: src={{ analytics_core_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - framework - -- name: Copy Framework Library to azure blob - command: az storage blob upload --overwrite --debug -c {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - framework-spark-cluster - -- name: Copy Framework Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ analytics_core_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ analytics_core_artifact }} --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - framework-spark-cluster - -- name: Copy Scruid Library - copy: src={{ scruid_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - framework - -- name: Copy Scruid Library to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} -f {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - framework-spark-cluster - -- name: Copy Scruid Library to oci oss - command: oci os object put -bn {{ bucket }} --name models-{{ model_version }}/{{ scruid_artifact }} --file {{ analytics.home }}/models-{{ model_version }}/{{ scruid_artifact }} --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - framework-spark-cluster - -- name: Copy Job Manager - copy: src={{ analytics_job_manager_artifact }} dest={{ analytics.home }}/models-{{ model_version }} - tags: - - dataproducts - -- name: Copy configuration file - template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - ed-dataproducts - - framework - when: dockdataproducts is undefined - -- name: Copy configuration file - template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/dock-{{ env }}.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - ed-dataproducts - - framework - when: dockdataproducts is defined - -- name: Copy configuration file as application.conf for cluster - template: src=common.conf.j2 dest={{ analytics.home }}/models-{{ model_version }}/application.conf mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - framework-spark-cluster - -- name: Update spark temp dir value for cluster - lineinfile: - path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' - regexp: '^spark_output_temp_dir="/mount/data/analytics/tmp/"' - line: 'spark_output_temp_dir="/var/log/sparkapp/tmp/"' - tags: - - framework-spark-cluster - -- name: Update logger kafka config for cluster - lineinfile: - path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' - regexp: '^log.appender.kafka.enable="false"' - line: 'log.appender.kafka.enable="true"' - tags: - - framework-spark-cluster - -- name: Copy configuration file to azure blob - command: az storage blob upload --overwrite -c {{ bucket }} -f {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf - async: 3600 - poll: 10 - when: dp_object_store_type == "azure" - tags: - - framework-spark-cluster - -- name: Copy configuration file to oci oss - command: oci os object put -bn {{ bucket }} --file {{ analytics.home }}/models-{{ model_version }}/application.conf --name models-{{ model_version }}/application.conf --force - async: 3600 - poll: 10 - when: dp_object_store_type == "oci" - tags: - - framework-spark-cluster - -- name: Copy log4j2 xml file - template: src=log4j2.xml.j2 dest={{ analytics.home }}/models-{{ model_version }}/log4j2.xml mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: [ dataproducts, framework, ed-dataproducts ] - -- name: Copy Scripts - template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - with_items: "{{ analytics.scripts }}" - tags: [ dataproducts, framework, ed-dataproducts ] - when: dockdataproducts is undefined - -- name: Copy python sanity check script file - template: src=exhaust_sanity_check.py.j2 dest={{ analytics.home }}/scripts/exhaust_sanity_check.py - tags: [ dataproducts, framework, ed-dataproducts ] - when: dockdataproducts is undefined - -- name: Copy Dock Scripts - template: src={{ item }}.j2 dest={{ analytics.home }}/scripts/{{ item }}.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - with_items: "{{ analytics.dockScripts }}" - tags: [ dataproducts, framework, ed-dataproducts ] - when: dockdataproducts is defined - -- name: Update model config - template: src=model-config.j2 dest={{ analytics.home }}/scripts/model-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - when: dockdataproducts is undefined - -- name: Update model dock config - template: src=model-dock-config.j2 dest={{ analytics.home }}/scripts/model-dock-config.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - when: dockdataproducts is defined - -- name: Copy submit-all-jobs ruby file - template: src=submit-all-jobs.rb.j2 dest={{ analytics.home }}/scripts/submit-all-jobs.rb mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - -- name: Copy model-config.json file - template: src=model-config.json.j2 dest={{ analytics.home }}/scripts/model-config.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - dataproducts - - update-config - - ed-dataproducts - -- name: Clean cron jobs - command: crontab -r - ignore_errors: yes - tags: - - default-jobs - - spark-jobs - - spark1-jobs - - clean-cronjobs - -- name: Create daily cron jobs for wfs - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh wfs" - with_dict: "{{ run_wfs_job }}" - tags: - - spark1-jobs - -- name: Create daily cron jobs for monitor job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh monitor-job-summ" - with_dict: "{{ run_monitor_job }}" - tags: - - spark1-jobs - -- name: Create daily cron jobs using submit-all-jobs - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job='/bin/bash -lc "ruby {{ analytics.home }}/scripts/submit-all-jobs.rb"' - with_dict: "{{ submit_jobs }}" - tags: - - default-jobs - - spark-jobs - - cronjobs - -- name: Create start-jobmanager cron jobs - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/start-jobmanager.sh" - with_dict: "{{ start_jobmanager }}" - tags: - - default-jobs - - spark-jobs - - cronjobs - -- name: Create course-batch-status-updater cron job - cron: name="{{env}}-course-batch-status-updater" minute=*/{{ course_batch_status_updater_job_schedule }} job="{{ analytics.home }}/scripts/run-job.sh course-batch-status-updater" - tags: - - cronjobs - - default-jobs - - spark1-jobs - -- name: Create admin-user-reports cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-user-reports" - with_dict: "{{ run_admin_user_reports_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs -- name: Create admin-geo-reports cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh admin-geo-reports" - with_dict: "{{ run_admin_geo_reports_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create assessment-aggregator reports cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="/bin/bash {{ analytics.home }}/adhoc-scripts/run_exporter.sh > /home/analytics/output.log" - with_dict: "{{ run_assessment_aggregator_report_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create etb metrics cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} weekday={{ item.value.weekday }} job="{{ analytics.home }}/scripts/run-job.sh etb-metrics" - with_dict: "{{ run_etb_metrics_weekly_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create progress-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh progress-exhaust" - with_dict: "{{ run_progress_exhaust }}" - tags: - - cronjobs - - default-jobs - - spark1-jobs - -- name: Create response-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh response-exhaust" - with_dict: "{{ run_response_exhaust }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create cassandra-migration cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh cassandra-migration" - with_dict: "{{ run_cassandra_migration }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - - -- name: Create userinfo-exhaust cron job - cron: name="{{ env }}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh userinfo-exhaust" - with_dict: "{{ run_userinfo_exhaust }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create collection-summary cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh collection-summary-report" - with_dict: "{{ run_collection_summary }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Copy collection-summary ingestion spec - copy: src="collection-summary-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - ed-dataproducts - -- name: Create sourcing-summary cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-dock-job.sh sourcing-summary-report" - with_dict: "{{ run_sourcing_summary }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create uci-private-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-private-exhaust" - with_dict: "{{ run_uci_private_exhaust_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Create uci-response-exhaust cron job - cron: name="{{env}}-{{ item.key }}" minute={{ item.value.minute }} hour={{ item.value.hour }} job="{{ analytics.home }}/scripts/run-job.sh uci-response-exhaust" - with_dict: "{{ run_uci_response_exhaust_job }}" - tags: - - cronjobs - - default-jobs - - spark-jobs - -- name: Copy sourcing-summary ingestion spec - copy: src="sourcing-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - ed-dataproducts - -- name: Update start jobmanager - template: src=start-jobmanager.j2 dest={{ analytics.home }}/scripts/start-jobmanager.sh mode=755 owner={{ analytics_user }} group={{ analytics_group }} - tags: - - update-jobmanager-config - - dataproducts - -# Cluster job sumbit tasks -- name: Copy cluster-config.json file - template: src=cluster-config.json.j2 dest={{ analytics_cluster.home }}/cluster-config.json - delegate_to: localhost - tags: - - replay-job - - run-job - - config-update - -- name: Copy submit-script.sh file - template: src=submit-script.j2 dest={{ analytics_cluster.home }}/submit-script.sh mode=755 - delegate_to: localhost - tags: - - replay-job - - run-job - - config-update - -- name: Copy model-config.sh file - template: src=model-config.j2 dest={{ analytics_cluster.home }}/model-config.sh - delegate_to: localhost - tags: - - replay-job - - run-job - - config-update - -- name: Replay Job - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --startDate {{ start_date }} --endDate {{ end_date }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} &" - async: "{{ (pause_min * 60) }}" - poll: 0 - tags: - - replay-job - -- name: Run Job - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} --batch_id {{ batch_id }} &" - async: "{{ (pause_min * 60) }}" - poll: 0 - tags: - - run-job - -- name: Submit jobs - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ item }} --mode default --sparkMaster yarn &" - with_items: "{{ jobs.split(',')|list }}" - tags: - - job-submit - -# Cluster exhaust parallel jobs sumbit tasks - -- name: Install required python packages - pip: - name: - - psycopg2-binary - - pandas - - IPython - tags: - - parallel-jobs-submit - -- name: Copy python script file - template: src=update-job-requests.py.j2 dest={{ analytics_cluster.home }}/update-job-requests.py - delegate_to: localhost - tags: - - parallel-jobs-submit - -- name: Execute python script to populate batch numbers - shell: | - if echo "{{jobs}}" | grep 'druid' - then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{env}}_report_config - elif echo "{{jobs}}" | grep 'exhaust' - then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{env}}_job_request - fi - tags: - - parallel-jobs-submit - register: jobsCountStr - - -- debug: - var: jobsCountStr - tags: - - parallel-jobs-submit - -- name: Get stdout with parallelisation value from python script to tmp file - shell: echo "{{ jobsCountStr.stdout }}" > /tmp/test.txt - tags: - - parallel-jobs-submit - -- name: Extract parallelisation value from tmp file - shell: "cat /tmp/test.txt | tr '\n' ' ' | awk -F': ' '{print $NF}'" - register: jobsCountOut - tags: - - parallel-jobs-submit - -- debug: - var: jobsCountOut - tags: - - parallel-jobs-submit - -# set jobs count variable from python script output -- set_fact: - jobs_count: "{{ jobsCountOut.stdout }}" - tags: - - parallel-jobs-submit - -- name: Submit parallel exhaust jobs - shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ jobs }} --mode parallel-jobs --parallelisation {{ jobs_count }} &" - poll: 30 - tags: - - parallel-jobs-submit - register: submitOutput - -- debug: - var: submitOutput - tags: - - parallel-jobs-submit - -# Execute Exhaust job sanity check script tasks - -- name: Install required python packages - pip: - name: - - requests - tags: - - run-sanity - -- name: Run sanity check python script - shell: python {{ analytics.home }}/scripts/exhaust_sanity_check.py - tags: - - run-sanity - register: SanityCheckStatus \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 deleted file mode 100644 index 12ebf0bde0..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/cluster-config.json.j2 +++ /dev/null @@ -1,86 +0,0 @@ - -{% if dp_object_store_type == "azure" %} -{ - "jars": [ - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} -{% elif (dp_object_store_type == "s3") %} -{ - "jars": [ - "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", - "s3n://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", - "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "s3n://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "s3n://{{ bucket }}/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} -{% elif (dp_object_store_type == "oci") %} -{ - "jars": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - ], - "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - "files": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" - ], - "className": "org.ekstep.analytics.job.JobExecutor", - "executorCores": {{ spark_cluster.executor_core }}, - "executorMemory": "{{ spark_cluster.executor_memory }}", - "numExecutors": {{ spark_cluster.num_executors }}, - "conf": { - "spark.sql.autoBroadcastJoinThreshold" : "-1", - "spark.dynamicAllocation.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.shuffle.service.enabled" :"{{ spark_enable_dynamic_allocation }}", - "spark.sql.shuffle.partitions" : "{{ spark_sql_shuffle_partitions }}", - "spark.scheduler.mode" : "FAIR", - "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", - "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", - "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" - } -} -{% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 deleted file mode 100644 index e0ec7005df..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/common.conf.j2 +++ /dev/null @@ -1,317 +0,0 @@ -application.env="{{ env }}" -telemetry.version="2.1" -default.parallelization="10" -spark_output_temp_dir="/mount/data/analytics/tmp/" -lp.url="{{lp_url}}" -service.search.url="{{ service.search.url }}" -service.search.path="{{ service.search.path }}" -spark.cassandra.connection.host="{{groups['dp-cassandra'][0]}}" -cassandra.keyspace_prefix="{{ cassandra_keyspace_prefix }}" -cassandra.hierarchy_store_prefix="{{ cassandra_hierarchy_store_prefix }}" - - -storage.key.config="{{ dp_storage_key_config }}" -storage.secret.config="{{ dp_storage_secret_config }}" -reports.storage.key.config="{{ dp_reports_storage_key_config }}" -reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" -{% if dp_object_store_type == "azure" %} -cloud_storage_type="azure" -{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} -{% if cloud_service_provider == "oci" %} -cloud_storage_type="oci" -{% else %} -cloud_storage_type="s3" -{% endif %} -cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" -cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" -storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" -aws_storage_key="{{ s3_storage_key }}" -aws_storage_secret="{{ s3_storage_secret }}" -{% endif %} - -lp.contentmodel.versionkey="jd5ECm/o0BXwQCe8PfZY1NoUkB9HN41QjA80p22MKyRIcP5RW4qHw8sZztCzv87M" - -# Joblog Kafka appender config for cluster execution -log.appender.kafka.enable="false" -log.appender.kafka.broker_host="{{groups['processing-cluster-kafka'][0]}}:9092" -log.appender.kafka.topic="{{ env }}.druid.events.log" - -# Kafka connection configuration -kafka.consumer.brokerlist="{{groups['processing-cluster-kafka'][0]}}:9092" -kafka.consumer.topic="{{ env }}.analytics.job_queue" -no_of_jobs=42 - -# Spark Driver -spark.driver.memory=6g - -spark.memory_fraction={{ spark.memory_fraction }} -spark.storage_fraction={{ spark.storage_fraction }} -spark.driver_memory="{{ spark.driver_memory }}" - -#Monitor Jobs - -monitor { - notification { - webhook_url = "{{ data_exhaust_webhook_url }}" - channel = "{{ data_exhaust_Channel }}" - token = "{{ data_exhaust_token }}" - slack = true - name = "{{ data_exhaust_name }}" - } -} - -#App ID & Channel ID -default.consumption.app.id="no_value" -default.channel.id="in.ekstep" -default.creation.app.id="no_value" - - -# Media Service Type -media_service_type = "azure" - -azure_tenant="{{ media_service_azure_tenant }}" -azure_subscription_id="{{ media_service_azure_subscription_id }}" -azure_account_name="{{ media_service_azure_account_name }}" -azure_resource_group_name="{{ media_service_azure_resource_group_name }}" -azure_token_client_key="{{ media_service_azure_token_client_key }}" -azure_token_client_secret="{{ media_service_azure_token_client_secret }}" -elasticsearch.service.endpoint="http://{{groups['composite-search-cluster'][0]}}:9200" -elasticsearch.index.compositesearch.name="{{ es_search_index }}" - -org.search.api.url="{{ channelSearchServiceEndpoint }}" -org.search.api.key="{{ searchServiceAuthorizationToken }}" - -hierarchy.search.api.url="{{ hierarchySearchServiceUrl }}" -hierarchy.search.api.path="{{ hierarchySearchServicEndpoint }}" - -# Azure Media Service Config -azure { - location = "centralindia" - tenant = "tenant name" - subscription_id = "subscription id" - - api { - endpoint="Media Service API End Point" - version = "2018-07-01" - } - - account_name = "account name" - resource_group_name = "Resource Group Name" - - transform { - default = "media_transform_default" - hls = "media_transform_hls" - } - - stream { - base_url = "{{ stream_base_url }}" - endpoint_name = "default" - protocol = "Hls" - policy_name = "Predefined_ClearStreamingOnly" - } - - token { - client_key = "client key" - client_secret = "client secret" - } -} - -## Reports - Global config -cloud.container.reports="reports" - -# course metrics container in azure -course.metrics.cassandra.sunbirdKeyspace="sunbird" -course.metrics.cassandra.sunbirdCoursesKeyspace="sunbird_courses" -course.metrics.cassandra.sunbirdHierarchyStore="{{ cassandra_hierarchy_store_keyspace }}" -course.metrics.cloud.objectKey="" -course.metrics.cassandra.input.consistency="QUORUM" -es.host="http://{{groups['core-es'][0]}}" -es.port="9200" -es.composite.host="{{groups['composite-search-cluster'][0]}}" - -# State admin user reports -# Uses azure only - course.metrics.cloud.provider -admin.metrics.cloud.objectKey="" -admin.metrics.temp.dir="/mount/data/analytics/admin-user-reports" - -#Assessment report config -es.scroll.size = 1000 - -#BestScore or Latst Updated Score -assessment.metrics.bestscore.report=true -assessment.metrics.supported.contenttype="SelfAssess" -assessment.metrics.supported.primaryCategories="{{ assessment_metric_primary_category }}" -spark.sql.caseSensitive=true - -# content rating configurations - -druid.sql.host="http://{{druid_broker_host}}:8082/druid/v2/sql/" -druid.unique.content.query="{\"query\":\"SELECT DISTINCT \\\"object_id\\\" AS \\\"Id\\\"\\nFROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\"}" -druid.content.rating.query="{\"query\":\"SELECT \\\"object_id\\\" AS contentId, COUNT(*) AS \\\"totalRatingsCount\\\", SUM(edata_rating) AS \\\"Total Ratings\\\", SUM(edata_rating)/COUNT(*) AS \\\"averageRating\\\" FROM \\\"druid\\\".\\\"telemetry-feedback-events\\\" WHERE \\\"eid\\\" = 'FEEDBACK' AND \\\"edata_rating\\\">0 GROUP BY \\\"object_id\\\"\"}" -druid.content.consumption.query="{\"query\":\"SELECT COUNT(*) as \\\"play_sessions_count\\\", object_id as \\\"contentId\\\", SUM(total_time_spent) as \\\"total_time_spent\\\", dimensions_pdata_id, object_id\\nFROM \\\"summary-events\\\"\\nWHERE \\\"dimensions_mode\\\" = 'play' AND \\\"dimensions_type\\\" ='content' AND \\\"dimensions_pdata_pid\\\" != 'creation-portal' \\nGROUP BY object_id, dimensions_pdata_id\"}" -lp.system.update.base.url="{{lp_url}}/system/v3/content/update" - - -#Experiment Configuration - -user.search.api.url="{{sunbird_learner_service_url}}/private/user/v1/search" -user.search.limit="10000" - -# pipeline auditing -druid.pipeline_metrics.audit.query="{\"query\":\"SELECT \\\"job-name\\\", SUM(\\\"success-message-count\\\") AS \\\"success-message-count\\\", SUM(\\\"failed-message-count\\\") AS \\\"failed-message-count\\\", SUM(\\\"duplicate-event-count\\\") AS \\\"duplicate-event-count\\\", SUM(\\\"batch-success-count\\\") AS \\\"batch-success-count\\\", SUM(\\\"batch-error-count\\\") AS \\\"batch-error-count\\\", SUM(\\\"primary-route-success-count\\\") AS \\\"primary-route-success-count\\\", SUM(\\\"secondary-route-success-count\\\") AS \\\"secondary-route-success-count\\\" FROM \\\"druid\\\".\\\"pipeline-metrics\\\" WHERE \\\"job-name\\\" IN (%s) AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s' GROUP BY \\\"job-name\\\" \"}" -druid.telemetryDatasource.count.query="{ \"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"telemetry-events\\\" WHERE TIME_FORMAT(MILLIS_TO_TIMESTAMP(\\\"syncts\\\"), 'yyyy-MM-dd HH:mm:ss.SSS', 'Asia/Kolkata') BETWEEN TIMESTAMP '%s' AND '%s' AND \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" -druid.summaryDatasource.count.query="{\"query\": \"SELECT COUNT(*) AS \\\"total\\\" FROM \\\"druid\\\".\\\"summary-events\\\" WHERE \\\"__time\\\" BETWEEN TIMESTAMP '%s' AND TIMESTAMP '%s'\" }" - -#Pipeline Audit Jobs - -pipeline_audit { - notification { - webhook_url = "{{ data_exhaust_webhook_url }}" - channel = "{{ data_exhaust_Channel }}" - token = "{{ data_exhaust_token }}" - slack = true - name = "Pipeline Audit" - } -} - -#Druid Query Processor - -druid = { - hosts = "{{druid_broker_host}}:8082" - secure = false - url = "/druid/v2/" - datasource = "telemetry-events" - response-parsing-timeout = 300000 - client-backend = "com.ing.wbaa.druid.client.DruidAdvancedHttpClient" - client-config = { - druid-advanced-http-client ={ - queue-size = 32768 - queue-overflow-strategy = "Backpressure" - query-retries = 5 - query-retry-delay = 10 ms - host-connection-pool = { - max-connections = 32 - min-connections = 0 - max-open-requests = 128 - max-connection-lifetime = 20 min - idle-timeout = 15 min - client = { - # The time after which an idle connection will be automatically closed. - # Set to `infinite` to completely disable idle timeouts. - idle-timeout = 10 min - parsing.max-chunk-size = 10m - } - } - } - - } -} -druid.rollup.host="{{druid_rollup_broker_host}}" -druid.rollup.port=8082 -druid.query.wait.time.mins=10 -druid.report.upload.wait.time.mins=10 -druid.scan.batch.size=100 -druid.scan.batch.bytes=2000000 -druid.query.batch.buffer=500000 - - -// Metric event config -metric.producer.id="pipeline.monitoring" -metric.producer.pid="dataproduct.metrics" -push.metrics.kafka=true -metric.kafka.broker="{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" -metric.kafka.topic="{{ env }}.prom.monitoring.metrics" - -//Postgres Config -postgres.db="{{postgres.db_name}}" -postgres.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" -postgres.user="{{postgres.db_username}}" -postgres.pass="{{postgres.db_password}}" -postgres.program.table="program" -postgres.nomination.table="nomination" -postgres.usertable="\"V_User\"" -postgres.org.table="\"V_User_Org\"" - -druid.ingestion.path="/druid/indexer/v1/task" -druid.segment.path="/druid/coordinator/v1/metadata/datasources/" -druid.deletesegment.path="/druid/coordinator/v1/datasources/" - -postgres.druid.db="{{ druid_report_postgres_db_name }}" -postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" -postgres.druid.user="{{ druid_report_postgres_db_username }}" -postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" - - -location.search.url="https://{{location_search_url}}/v1/location/search" -location.search.token="{{ location_search_token }}" -location.search.request="{\"request\": {\"filters\": {\"type\" :[\"state\",\"district\"]},\"limit\" : 10000}}" - -druid.state.lookup.url = "http://{{groups['raw-coordinator'][0]}}:8081/druid/coordinator/v1/lookups/config/__default/stateSlugLookup" - -sunbird_encryption_key="{{ core_vault_sunbird_encryption_key }}" - -dcedialcode.filename="DCE_dialcode_data.csv" -etbdialcode.filename="ETB_dialcode_data.csv" -dcetextbook.filename="DCE_textbook_data.csv" -etbtextbook.filename="ETB_textbook_data.csv" -etb.dialcode.druid.length={{ etb_dialcode_list_druid_length }} - -{% if dp_object_store_type == "azure" %} -druid.report.default.storage="azure" -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} -druid.report.default.storage="s3" -{% endif %} - -druid.report.date.format="yyyy-MM-dd" -druid.report.default.container="report-verification" - -## Collection Exhaust Jobs Configuration -- Start ## - -sunbird.user.keyspace="{{ user_table_keyspace }}" -sunbird.courses.keyspace="{{ course_keyspace }}" -sunbird.content.hierarchy.keyspace="{{ cassandra_hierarchy_store_keyspace }}" -sunbird.user.cluster.host="{{ core_cassandra_host }}" -sunbird.courses.cluster.host="{{ core_cassandra_host }}" -sunbird.content.cluster.host="{{ core_cassandra_host }}" -sunbird.report.cluster.host="{{ report_cassandra_cluster_host }}" -sunbird.user.report.keyspace="{{ report_user_table_keyspace }}" -collection.exhaust.store.prefix="" -postgres.table.job_request="{{ job_request_table }}" -postgres.table.dataset_metadata="{{ dataset_metadata_table }}" - -## Collection Exhaust Jobs Configuration -- End ## - -## Exhaust throttling variables -exhaust.batches.limit.per.channel={{ exhaust_batches_limit_per_channel }} -exhaust.file.size.limit.per.channel={{ exhaust_file_size_limit_bytes_per_channel }} - -exhaust.parallel.batch.load.limit={{ exhaust_parallel_batch_load_limit }} -exhaust.user.parallelism={{ exhaust_user_parallelism }} - -data_exhaust.batch.limit.per.request={{ data_exhaust_batch_limit_per_request }} - - - -//START of UCI Postgres Config - -uci.conversation.postgres.db="{{ uci_postgres.conversation_db_name }}" -uci.conversation.postgres.url="jdbc:postgresql://{{uci_postgres.conversation_db_host}}:{{uci_postgres.conversation_db_port}}/" - -uci.fushionauth.postgres.db="{{ uci_postgres.fushionauth_db_name }}" -uci.fushionauth.postgres.url="jdbc:postgresql://{{uci_postgres.fushionauth_db_host}}:{{uci_postgres.fushionauth_db_port}}/" - -uci.postgres.table.conversation="{{ uci_postgres.conversation_table_name }}" -uci.postgres.table.user="{{ uci_postgres.user_table_name }}" -uci.postgres.table.user_registration="{{ uci_postgres.user_registration_table_name }}" -uci.postgres.table.identities="{{ uci_postgres.user_identities_table_name }}" - -uci.conversation.postgres.user="{{ uci_postgres.conversation_db_user }}" -uci.conversation.postgres.pass="{{ uci_postgres.conversation_db_psss }}" - -uci.fushionauth.postgres.user="{{ uci_postgres.fushionauth_db_user }}" -uci.fushionauth.postgres.pass="{{ uci_postgres.fushionauth_db_psss }}" - -uci.exhaust.store.prefix="" -uci.encryption.secret="{{ uci_encryption_secret_key }}" - -// END OF UCI Related Job Configs \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 deleted file mode 100644 index 3f6ba98d9d..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/exhaust_sanity_check.py.j2 +++ /dev/null @@ -1,58 +0,0 @@ -import requests -from requests.auth import HTTPBasicAuth -import json -from kafka import KafkaConsumer -from json import loads -import sys - -def checkClusterStatus(): - try: - res = requests.get('https://{{ spark_cluster_name }}.azurehdinsight.net/api/v1/clusters/{{ spark_cluster_name }}/alerts?format=summary', auth = HTTPBasicAuth("{{ admin_name }}" ,"{{ admin_password }}")) - if(res.status_code == 200): - resJson = json.loads(res.text) - warningCount = resJson["alerts_summary"]["WARNING"]["count"] - criticalCount = resJson["alerts_summary"]["CRITICAL"]["count"] - unknownCount = resJson["alerts_summary"]["UNKNOWN"]["count"] - if((warningCount + criticalCount + unknownCount) == 0): - print("Cluster is up & running fine. With these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount)) - return "SUCCESS" - else: - return "FAILED. Cluster is not running properly. Found these - WARNING:{0}, CRITICAL:{1}, UNKNOWN:{2}".format(warningCount, criticalCount, unknownCount) - else: - return "FAILED. Cluster failed to provide response. Resulted in {0} response".format(res.status_code) - except Exception as e: - return "FAILED with {0}".format(str(e)) - -def checkCassandraMigratorStatus(): - try: - ## from joblog file - migratorENDEvent = "" - with open ('{{ analytics.home }}/scripts/logs/joblog.log', 'rt') as logs: - for log in logs: - if (log.count("JOB_END") == 1 & log.count("{{ cassandra_migrator_job_name }}") == 1): - migratorENDEvent = log - logJson = json.loads(migratorENDEvent) - jobStatus = logJson["edata"]["status"] - if (jobStatus == "SUCCESS"): - print("Cassandra Migrator Completed successfully!") - return "SUCCESS" - else: - return "Cassandra Migrator failed" - except Exception as e: - return "FAILED with {0}".format(str(e)) - - -def main(): - finalSuccessMessage="All checks are successful" - ## check Cassandra Migrator status - cassandraMigratorState=checkCassandraMigratorStatus() - ## check spark cluster status - clusterState=checkClusterStatus() - - if(cassandraMigratorState == "SUCCESS" and clusterState == "SUCCESS"): - return finalSuccessMessage - else: - raise Exception("Required checks failed. Job Status: {0} and Cluster status: {1}".format(cassandraMigratorState, clusterState)) - -result=main() -print(result) \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 deleted file mode 100644 index c82cdd702c..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/log4j2.xml.j2 +++ /dev/null @@ -1,54 +0,0 @@ - - - - {{ analytics.home }}/scripts/logs - {{ analytics.home }}/scripts/logs - - - - - - - - %m%n - - - - - - - - - - - - - - %m%n - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 deleted file mode 100644 index 86f376b65d..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.j2 +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env bash - -config() { - bucket={{ secor_bucket }} - brokerList={{ brokerlist }} - zookeeper={{ zookeeper }} - brokerIngestionList={{ ingestion_kafka_brokers }} - job_topic={{ analytics_job_queue_topic }} - topic={{ topic }} - analyticsMetricsTopic={{ analytics_metrics_topic }} - sinkTopic={{ sink_topic }} - metricsTopic={{ metrics_topic }} - analytics_home={{ analytics.home }} - temp_folder={{ job_manager_tmp_dir }} - sparkCassandraConnectionHost="{{ lp_cassandra_host }}" - sparkRedisConnectionHost={{ metadata2_redis_host }} - sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" - sunbirdPlatformElasticsearchHost="{{ sunbird_es_host }}" - jobManagerJobsCount="{{ analytics_jobs_count }}" - producerEnv="{{ producer_env }}" - baseScriptPath="{{ spark_output_temp_dir }}" - reportPostContainer="{{ reports_container }}" - druidIngestionURL="{{ druid_rollup_cluster_ingestion_task_url }}/druid/indexer/v1/task" - assessTopic={{ assess_topic }} - - - if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi - if [ ! -z "$3" ]; then inputBucket=$3; fi - if [ ! -z "$4" ]; then sinkTopic=$4; fi - if [ ! -z "$2" ]; then keyword=$2; fi - case "$1" in - "assessment-correction") - echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' - ;; - "assessment-archival") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' - ;; - "assessment-archived-removal") -{% if dp_object_store_type == "azure" %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' - ;; -{% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' - ;; -{% endif %} - "collection-reconciliation-job") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' - ;; - "collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' - ;; - "score-metric-migration-job") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' - ;; - "assessment-score-metric-correction") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' - ;; - "course-batch-status-updater") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' - ;; - "collection-summary-report-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' - ;; - "uci-private-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' - ;; - "uci-response-exhaust") - echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' - ;; - "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' - ;; - "program-collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' - ;; - "response-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' - ;; - "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' - ;; - "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' - ;; - "progress-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' - ;; - "druid_reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' - ;; - "cassandra-migration") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.updater.CassandraMigratorJob","modelParams":{"cassandraDataHost":"{{ core_cassandra_host }}","cassandraMigrateHost":"{{ report_cassandra_host }}","keyspace":"sunbird_courses","cassandraDataTable":"user_enrolments","cassandraMigrateTable":"{{ report_user_enrolment_table }}","repartitionColumns":"batchid"},"parallelization":10,"appName":"Cassandra Migrator","deviceMapping":false}' - ;; - "monitor-job-summ") - echo '{"search":{"type":"local","queries":[{"file":"'$analytics_home'/scripts/logs/joblog.log"}]},"model":"org.ekstep.analytics.model.MonitorSummaryModel","modelParams":{"pushMetrics":true,"brokerList":"'$brokerList'","topic":"'$analyticsMetricsTopic'","model":[{"model":"WorkFlowSummaryModel","category":"consumption","input_dependency":"None"},{"model":"UpdateContentRating","category":"consumption","input_dependency":"None"},{"model":"DruidQueryProcessingModel","category":"consumption","input_dependency":"None"},{"model":"MetricsAuditJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminReportJob","category":"consumption","input_dependency":"None"},{"model":"StateAdminGeoReportJob","category":"consumption","input_dependency":"None"},{"model":"CourseEnrollmentJob","category":"consumption","input_dependency":"None"}]},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"appName":"TestMonitorSummarizer","deviceMapping":true}' - ;; - "job-manager") - echo '{"jobsCount":'$jobManagerJobsCount',"topic":"'$job_topic'","bootStrapServer":"'$brokerList'","zookeeperConnect":"'$zookeeper'","consumerGroup":"jobmanager","slackChannel":"#test_channel","slackUserName":"JobManager","tempBucket":"'$bucket'","tempFolder":"'$temp_folder'"}' - ;; - "wfs") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' - #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' - ;; - "video-streaming") - echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' - ;; - "admin-user-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' - ;; - "admin-geo-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' - ;; - "telemetry-replay") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' - ;; - "summary-replay") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"SummaryReplayJob","deviceMapping":false}' - ;; - "content-rating-updater") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.updater.UpdateContentRating","modelParams": {"startDate": "'$endDate'","endDate": "'$endDate'"},"output": [{"to":"console","params":{"printEvent":false}}],"parallelization": 8,"appName": "Content Rating Updater","deviceMapping": false}' - ;; - "experiment") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.ExperimentDefinitionModel","modelParams":{"sparkElasticsearchConnectionHost":"{{ lp_composite_search_host }}"},"output":[{"to":"elasticsearch","params":{"index":"experiment"}}],"parallelization":8,"appName":"Experiment-Definition","deviceMapping":false}' - ;; - "etb-metrics") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' - ;; - "course-enrollment-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' - ;; - "course-consumption-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' - ;; - "textbook-progress-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' - ;; - "audit-metrics-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' - ;; - "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' - ;; - "druid-dataset") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' - ;; - "*") - echo "Unknown model code" - exit 1 # Command to come out of the program with status 1 - ;; - esac -} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 deleted file mode 100644 index a3569c7f46..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/model-config.json.j2 +++ /dev/null @@ -1,670 +0,0 @@ -{ - "wfs": { - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "unique/raw/", - "endDate": "$(date --date yesterday '+%Y-%m-%d')", - "delta": 0 - } - ] - }, - "filters": [ - { - "name": "actor", - "operator": "ISNOTNULL" - } - ], - "model": "org.ekstep.analytics.model.WorkflowSummary", - "modelParams": { - "apiVersion": "v2", - "parallelization": 32 - }, - "output": [ - { - "to": "{{dp_object_store_type}}", - "params": { - "bucket": "{{ bucket }}", - "key": "{{ job_manager_tmp_dir }}/wfs/$(date --date yesterday '+%Y-%m-%d')" - } - }, - { - "to": "kafka", - "params": { - "brokerList": "{{ brokerlist }}", - "topic": "{{ topic }}" - } - } - ], - "parallelization": 32, - "appName": "Workflow Summarizer", - "deviceMapping": true - }, - "video-streaming": { - "search": { - "type": "{{dp_object_store_type}}" - }, - "model": "org.ekstep.analytics.job.VideoStreamingJob", - "modelParams": { - "maxIterations": 10 - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Video Streaming Job", - "deviceMapping": false - }, - "admin-user-reports": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.job.report.StateAdminReportJob", - "modelParams": { - "sparkCassandraConnectionHost": "{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Admin User Reports", - "deviceMapping": false - }, - "admin-geo-reports": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.job.report.StateAdminGeoReportJob", - "modelParams": { - "sparkCassandraConnectionHost": "{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Admin Geo Reports", - "deviceMapping": false - }, - "content-rating-updater": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.updater.UpdateContentRating", - "modelParams": { - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date '+%Y-%m-%d')" - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - } - ], - "parallelization": 8, - "appName": "Content Rating Updater", - "deviceMapping": false - }, - "monitor-job-summ": { - "search": { - "type": "local", - "queries": [ - { - "file": "{{ analytics.home }}/scripts/logs/joblog.log" - } - ] - }, - "model": "org.ekstep.analytics.model.MonitorSummaryModel", - "modelParams": { - "pushMetrics": true, - "brokerList": "{{ brokerlist }}", - "topic": "{{ analytics_metrics_topic }}", - "model": [ - { - "model": "WorkFlowSummaryModel", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "UpdateContentRating", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "DruidQueryProcessingModel", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "MetricsAuditJob", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "StateAdminReportJob", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "StateAdminGeoReportJob", - "category": "consumption", - "input_dependency": "None" - }, - { - "model": "CourseEnrollmentJob", - "category": "consumption", - "input_dependency": "None" - } - ] - }, - "output": [ - { - "to": "console", - "params": { - "printEvent": false - } - }, - { - "to": "kafka", - "params": { - "brokerList": "{{ brokerlist }}", - "topic": "{{ topic }}" - } - } - ], - "appName": "TestMonitorSummarizer", - "deviceMapping": true - }, - "experiment": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.ExperimentDefinitionModel", - "modelParams": { - "sparkElasticsearchConnectionHost": "{{ lp_composite_search_host }}" - }, - "output": [ - { - "to": "elasticsearch", - "params": { - "index": "experiment" - } - } - ], - "parallelization": 8, - "appName": "Experiment-Definition", - "deviceMapping": false - }, - "etb-metrics": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.report.ETBMetricsJob", - "modelParams": { - "reportConfig": { - "id": "etb_metrics", - "metrics": [], - "labels": { - "date": "Date", - "identifier": "TextBook ID", - "name": "TextBook Name", - "medium": "Medium", - "gradeLevel": "Grade", - "subject": "Subject", - "createdOn": "Created On", - "lastUpdatedOn": "Last Updated On", - "totalQRCodes": "Total number of QR codes", - "contentLinkedQR": "Number of QR codes with atleast 1 linked content", - "withoutContentQR": "Number of QR codes with no linked content", - "withoutContentT1": "Term 1 QR Codes with no linked content", - "withoutContentT2": "Term 2 QR Codes with no linked content", - "status": "Status", - "totalContentLinked": "Total content linked", - "totalQRLinked": "Total QR codes linked to content", - "totalQRNotLinked": "Total number of QR codes with no linked content", - "leafNodesCount": "Total number of leaf nodes", - "leafNodeUnlinked": "Number of leaf nodes with no content", - "l1Name": "Level 1 Name", - "l2Name": "Level 2 Name", - "l3Name": "Level 3 Name", - "l4Name": "Level 4 Name", - "l5Name": "Level 5 Name", - "dialcode": "QR Code", - "sum(scans)": "Total Scans", - "noOfContent": "Number of contents", - "nodeType": "Type of Node", - "term": "Term" - }, - "output": [{ - "type": "csv", - "dims": ["identifier", "channel", "name"], - "fileParameters": ["id", "dims"] - }], - "mergeConfig": { - "frequency": "WEEK", - "basePath": "{{ spark_output_temp_dir }}", - "rollup": 0, - "reportPath": "dialcode_counts.csv", - "postContainer":"{{ reports_container }}" - } - }, - "dialcodeReportConfig": { - "id": "etb_metrics", - "metrics": [], - "labels": {}, - "output": [{ - "type": "csv", - "dims": ["identifier", "channel", "name"], - "fileParameters": ["id", "dims"] - }], - "mergeConfig": { - "frequency": "WEEK", - "basePath": "{{ spark_output_temp_dir }}", - "rollup": 1, - "reportPath": "dialcode_counts.csv", - "rollupAge": "ACADEMIC_YEAR", - "rollupCol": "Date", - "rollupRange": 10, - "postContainer":"{{ reports_container }}" - } - }, - "etbFileConfig": { - "bucket": "{{ reports_container }}", - "file": "dialcode_scans/dialcode_counts.csv" - }, - "druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"},{"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName": "name","aliasName": "name"},{"fieldName": "createdFor","aliasName": "createdFor"},{"fieldName": "createdOn","aliasName": "createdOn"},{"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"},{"fieldName": "board","aliasName": "board"},{"fieldName": "medium","aliasName": "medium"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"},{"fieldName": "subject","aliasName": "subject"},{"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"},{"type": "in","dimension": "status","values": ["Live","Draft","Review"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}}, - "tenantConfig": { - "tenantId": "", - "slugName": "" - }, - "store": "{{dp_object_store_type}}", - "format": "csv", - "key": "druid-reports/", - "filePath": "druid-reports/", - "container": "{{ bucket }}", - "folderPrefix": ["slug", "reportName"] - }, - "output": [{ - "to": "console", - "params": { - "printEvent": false - } - }], - "parallelization": 8, - "appName": "ETB Metrics Model", - "deviceMapping": false - }, - "course-enrollment-report":{ - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.report.CourseEnrollmentJob", - "modelParams": { - "reportConfig": { - "id": "tpd_metrics", - "metrics" : [], - "labels": { - "completionCount": "Completion Count", - "status": "Status", - "enrollmentCount": "Enrollment Count", - "courseName": "Course Name", - "batchName": "Batch Name" - }, - "output": [{ - "type": "csv", - "dims": [] - }] - }, - "esConfig": { - "request": { - "filters":{ - "objectType": ["Content"], - "contentType": ["Course"], - "identifier": [], - "status": ["Live"] - }, - "limit": 10000 - } - }, - "store": "{{dp_object_store_type}}", - "format":"csv", - "key": "druid-reports/", - "filePath": "druid-reports/", - "container": "{{ bucket }}", - "folderPrefix": ["slug", "reportName"], - "sparkCassandraConnectionHost":"{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" - }, - "output": [{ - "to": "console", - "params": { - "printEvent": false - } - }], - "parallelization": 8, - "appName": "TPD Course Enrollment Metrics Model", - "deviceMapping": false - }, - "course-consumption-report":{ - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.report.CourseConsumptionJob", - "modelParams": { - "esConfig": { - "request": { - "filters": { - "objectType": ["Content"], - "contentType": ["Course"], - "identifier": [], - "status": ["Live"] - } - } - }, - "reportConfig": { - "id": "tpd_metrics", - "labels": { - "date": "Date", - "status": "Batch Status", - "timespent": "Timespent in mins", - "courseName": "Course Name", - "batchName": "Batch Name" - }, - "dateRange": { - "staticInterval": "LastDay", - "granularity": "all" - }, - "metrics": [{ - "metric": "totalCoursePlays", - "label": "Total Course Plays (in mins) ", - "druidQuery": { - "queryType": "groupBy", - "dataSource": "summary-events", - "intervals": "LastDay", - "aggregations": [{ - "name": "sum__edata_time_spent", - "type": "doubleSum", - "fieldName": "edata_time_spent" - }], - "dimensions": [{ - "fieldName": "object_rollup_l1", - "aliasName": "courseId" - }, { - "fieldName": "uid", - "aliasName": "userId" - }, { - "fieldName": "context_cdata_id", - "aliasName": "batchId" - }], - "filters": [{ - "type": "equals", - "dimension": "eid", - "value": "ME_WORKFLOW_SUMMARY" - }, { - "type": "in", - "dimension": "dimensions_pdata_id", - "values": ["{{ producer_env }}.app", "{{ producer_env }}.portal"] - }, { - "type": "equals", - "dimension": "dimensions_type", - "value": "content" - }, { - "type": "equals", - "dimension": "dimensions_mode", - "value": "play" - }, { - "type": "equals", - "dimension": "context_cdata_type", - "value": "batch" - }], - "postAggregation": [{ - "type": "arithmetic", - "name": "timespent", - "fields": { - "leftField": "sum__edata_time_spent", - "rightField": 60, - "rightFieldType": "constant" - }, - "fn": "/" - }], - "descending": "false" - } - }], - "output": [{ - "type": "csv", - "metrics": ["timespent"], - "dims": [] - }], - "queryType": "groupBy" - }, - "store": "{{dp_object_store_type}}", - "format":"csv", - "key": "druid-reports/", - "filePath": "druid-reports/", - "container": "{{ bucket }}", - "folderPrefix": ["slug", "reportName"], - "sparkCassandraConnectionHost":"{{core_cassandra_host}}", - "sparkElasticsearchConnectionHost":"{{sunbird_es_host}}" - }, - "output": [{ - "to": "console", - "params": { - "printEvent": false - } - }], - "parallelization": 8, - "appName": "TPD Course Consumption Metrics Model", - "deviceMapping": false - }, - "audit-metrics-report": { - "search": { - "type": "none" - }, - "model": "org.ekstep.analytics.model.MetricsAuditJob", - "modelParams": { - "auditConfig": [ - { - "name": "denorm", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "telemetry-denormalized/raw/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - }, - "filters": [ - { - "name": "flags.user_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.content_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.device_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.dialcode_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.collection_data_retrieved", - "operator": "EQ", - "value": true - }, - { - "name": "flags.derived_location_retrieved", - "operator": "EQ", - "value": true - } - ] - }, - { - "name": "failed", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "failed/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "unique", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "unique/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "raw", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "raw/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "channel-raw", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "folder": true, - "bucket": "{{ bucket }}", - "prefix": "channel/*/raw/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" - } - ] - } - }, - { - "name": "channel-summary", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "folder": true, - "bucket": "{{ bucket }}", - "prefix": "channel/*/summary/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')*.json.gz" - } - ] - } - }, - { - "name": "derived", - "search": { - "type": "{{dp_object_store_type}}", - "queries": [ - { - "bucket": "{{ bucket }}", - "prefix": "derived/wfs/", - "startDate": "$(date --date yesterday '+%Y-%m-%d')", - "endDate": "$(date --date yesterday '+%Y-%m-%d')" - } - ] - } - }, - { - "name": "telemetry-count", - "search": { - "type": "druid", - "druidQuery": { - "queryType": "timeSeries", - "dataSource": "telemetry-events", - "intervals": "LastDay", - "aggregations": [ - { - "name": "total_count", - "type": "count", - "fieldName": "count" - } - ], - "descending": "false" - } - } - }, - { - "name": "summary-count", - "search": { - "type": "druid", - "druidQuery": { - "queryType": "timeSeries", - "dataSource": "summary-events", - "intervals": "LastDay", - "aggregations": [ - { - "name": "total_count", - "type": "count", - "fieldName": "count" - } - ], - "descending": "false" - } - } - } - ] - }, - "output": [ - { - "to": "kafka", - "params": { - "brokerList": "{{ brokerlist }}", - "topic": "{{ metrics_topic }}" - } - } - ], - "parallelization": 8, - "appName": "Metrics Audit" - } -} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 deleted file mode 100644 index f720f4687e..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/model-dock-config.j2 +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -config() { - bucket={{ bucket }} - brokerList={{ brokerlist }} - zookeeper={{ zookeeper }} - job_topic={{ analytics_job_queue_topic }} - topic={{ topic }} - sparkCassandraConnectionHost="{{ lp_cassandra_host }}" - sunbirdPlatformCassandraHost="{{ core_cassandra_host }}" - reportPostContainer="{{ reports_container }}" - druidRollupHost="{{ druid_rollup_cluster_ingestion_task_url }}" - - if [ -z "$2" ]; then endDate=$(date --date yesterday "+%Y-%m-%d"); else endDate=$2; fi - if [ ! -z "$3" ]; then inputBucket=$3; fi - case "$1" in - "content-details") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.ContentDetailsReport","modelParams":{"tenantId":"","slug":"","reportConfig":{"id":"content_report","metrics":[],"labels":{"programName":"Project Name","programId":"Project ID","contentId":"Content/Question ID","contentName":"Content/Question Name","mimeType":"MimeType","chapterId":"Folder ID","contentStatus":"Content/Question Status","creator":"Creator Name","createdBy":"CreatedBy ID","date":"Date","identifier":"Collection/Question Set ID","name":"Collection/Question Set Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","board":"Board","grade":"Grade","chapters":"Chapter Name","status":"Textbook Status","objectType":"Object Type","primaryCategory":"Primary category","topic":"Topic","learningOutcome":"Learning Outcome","addedFromLibrary":"Added from library","contentType":"Content Type"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"contentQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"unitIdentifiers","aliasName":"unitIdentifiers"},{"fieldName":"collectionId","aliasName":"collectionId"},{"fieldName":"createdBy","aliasName":"createdBy"},{"fieldName":"creator","aliasName":"creator"},{"fieldName":"mimeType","aliasName":"mimeType"},{"fieldName":"topic","aliasName":"topic"},{"fieldName":"learningOutcome","aliasName":"learningOutcome"},{"fieldName":"primaryCategory","aliasName":"contentType"}],"filters":[{"type":"notequals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live"]},{"type":"isnotnull","dimension":"collectionId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"textbookQuery":{"queryType":"groupBy","dataSource":"vdn-content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"programId","aliasName":"programId"},{"fieldName":"identifier","aliasName":"identifier"},{"fieldName":"name","aliasName":"name"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"},{"fieldName":"acceptedContents","aliasName":"acceptedContents"},{"fieldName":"acceptedContributions","aliasName":"acceptedContributions"},{"fieldName":"rejectedContents","aliasName":"rejectedContents"},{"fieldName":"rejectedContributions","aliasName":"rejectedContributions"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"objectType","aliasName":"objectType"},{"fieldName":"reusedContributions","aliasName":"reusedContributions"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper","Exam Question Set","Practice Set","Demo Practice Question Set"]},{"type":"isnotnull","dimension":"programId"},{"type":"in","dimension":"status","values":["Draft"]},{"type":"equals","dimension":"channel","value":"channelId"}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{dp_object_store_type}}","storageKeyConfig":"azure_storage_key","storageSecretConfig":"azure_storage_secret","storageContainer":"'$reportPostContainer'","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Content Report Job","deviceMapping":false}' - ;; - "sourcing-summary-report") - echo '{"search": {"type": "none"}, "model": "org.ekstep.analytics.job.report.SourcingSummaryReport", "modelParams": {"storageKeyConfig":"druid_storage_account_key", "storageSecretConfig":"druid_storage_account_secret", "dataSource": "sourcing-summary-snapshot", "druidHost": "'$druidRollupHost'", "druidSegmentUrl":"'$druidRollupHost'/druid/coordinator/v1/metadata/datasources/sourcing-model-snapshot/segments", "deleteSegmentUrl": "'$druidRollupHost'/druid/coordinator/v1/datasources/sourcing-model-snapshot/segments/", "druidIngestionUrl": "'$druidRollupHost'/druid/indexer/v1/task", "specPath": "/mount/data/analytics/scripts/sourcing-ingestion-spec.json", "dbName": "opensaberdb", "tables": {"programTable": "program", "nominationTable": "nomination"}, "druidQuery": {"queryType": "groupBy", "dataSource": "vdn-content-model-snapshot", "intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00", "aggregations": [{"name": "count", "type": "count"}], "dimensions": [{"fieldName": "primaryCategory", "aliasName": "primaryCategory"}, {"fieldName": "createdBy", "aliasName": "createdBy"}], "filters": [{"type": "equals", "dimension": "objectType", "value": "Content"}, {"type": "equals", "dimension": "sampleContent", "value": "false"}], "postAggregation": [], "descending": "false", "limitSpec": {"type": "default", "limit": 1000000, "columns": [{"dimension": "count", "direction": "descending"}]}}, "reportConfig": {"id": "sourcing", "metrics": [], "labels": {}, "output": [{"type": "json", "dims": ["identifier", "channel", "name"], "fileParameters": ["id", "dims"]}]}, "store": "{{dp_object_store_type}}", "format": "json", "folderPrefix": ["slug", "reportName"]}, "output": [{"to": "console", "params": {"printEvent": false}}], "parallelization": 8, "appName": "Sourcing Summary Report Job", "deviceMapping": false}' - ;; - "funnel-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.job.report.FunnelReport","modelParams": {"contributionConfig": {"contentRequest": {"request": {"filters": {"programId": "programIdentifier","objectType": "content","status": ["Draft", "Live", "Review"],"mimeType": "application/vnd.ekstep.content-collection"},"fields": ["acceptedContents", "rejectedContents"],"limit": 10000}},"correctionsPendingRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": "Draft","prevStatus": "Live","programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}},"contributionRequest": {"request": {"filters": {"objectType": ["content","questionset"],"status": ["Live"],"programId": "programIdentifier","mimeType": {"!=": "application/vnd.ekstep.content-collection"},"contentType": {"!=": "Asset"}},"not_exists": ["sampleContent"],"facets": ["createdBy"],"limit": 0}}},"reportConfig": {"id": "funnel_report","metrics": [],"labels": {"reportDate": "Report generation date","visitors": "No. of users opening the project","projectName": "Project Name","initiatedNominations": "No. of initiated nominations","rejectedNominations": "No. of rejected nominations","pendingNominations": "No. of nominations pending review","acceptedNominations": "No. of accepted nominations to the project","noOfContributors": "No. of contributors to the project","noOfContributions": "No. of contributions to the project","pendingContributions": "No. of contributions pending review","approvedContributions": "No. of approved contributions"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"store": "{{dp_object_store_type}}","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","folderPrefix": ["slug", "reportName"]},"sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","druidConfig": {"queryType": "timeseries","dataSource": "telemetry-events-syncts","intervals": "startdate/enddate","aggregations": [{"name": "visitors","type": "count","fieldName": "actor_id"}],"filters": [{"type": "equals","dimension": "context_cdata_id","value": "program_id"}, {"type": "equals","dimension": "edata_pageid","value": "contribution_project_contributions"}, {"type": "equals","dimension": "context_pdata_pid","value": "creation-portal.programs"}, {"type": "equals","dimension": "context_cdata_type","value": "project"}, {"type": "equals","dimension": "context_env","value": "creation-portal"}, {"type": "equals","dimension": "eid","value": "IMPRESSION"}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Funnel Report Job","deviceMapping": false}' - ;; - "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","identifier": "Textbook ID","name": "Textbook Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Chapter Name","totalChapters": "Total number of chapters (first level sections of ToC)","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "equals","dimension": "contentType","value": "TextBook"}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{dp_object_store_type}}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' - ;; - "*") - echo "Unknown model code" - exit 1 # Command to come out of the program with status 1 - ;; - esac -} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 deleted file mode 100644 index 3a6c969b7b..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/replay-job.j2 +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products - -cd {{ analytics.home }}/scripts -source model-config.sh -source replay-utils.sh - -libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" - -if [ "$1" == "telemetry-replay" ] - then - if [ ! $# -eq 5 ] - then - echo "Not suffecient arguments. killing process" - exit - fi -fi - -get_report_job_model_name(){ - case "$1" in - "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' - ;; - *) echo $1 - ;; - esac -} - -if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi -if [ -z "$job_config" ]; then job_config=$(config $1 '__endDate__' $4 $5); fi -start_date=$2 -end_date=$3 -backup_key=$1 - -if [ "$1" == "gls-v1" ] - then - backup_key="gls" -elif [ "$1" == "app-ss-v1" ] - then - backup_key="app-ss" -fi - -backup $start_date $end_date {{ bucket }} "derived/$backup_key" "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" -if [ $? == 0 ] - then - echo "Backup completed Successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" - echo "Running the $1 job replay..." >> "$DP_LOGS/$end_date-$1-replay.log" - echo "Job modelName - $job_id" >> "$DP_LOGS/$end_date-$1-replay.log" - $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" -else - echo "Unable to take backup" >> "$DP_LOGS/$end_date-$1-replay.log" -fi - -if [ $? == 0 ] - then - echo "$1 replay executed successfully" >> "$DP_LOGS/$end_date-$1-replay.log" - delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" -else - echo "$1 replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" - rollback {{ bucket }} "derived/$backup_key" "backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" - delete {{ bucket }} "derived/backup-$backup_key" >> "$DP_LOGS/$end_date-$1-replay.log" -fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 deleted file mode 100644 index 580c3bf29c..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/replay-updater.j2 +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products - -cd {{ analytics.home }}/scripts -source model-config.sh -source replay-utils.sh - -job_config=$(config $1 '__endDate__') -start_date=$2 -end_date=$3 - -echo "Running the $1 updater replay..." >> "$DP_LOGS/$end_date-$1-replay.log" -$SPARK_HOME/bin/spark-submit --master local[*] --jars $MODELS_HOME/analytics-framework-2.0.jar --class org.ekstep.analytics.job.ReplaySupervisor $MODELS_HOME/batch-models-2.0.jar --model "$1" --fromDate "$start_date" --toDate "$end_date" --config "$job_config" >> "$DP_LOGS/$end_date-$1-replay.log" - -if [ $? == 0 ] - then - echo "$1 updater replay executed successfully..." >> "$DP_LOGS/$end_date-$1-replay.log" -else - echo "$1 updater replay failed" >> "$DP_LOGS/$end_date-$1-replay.log" - exit 1 -fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 deleted file mode 100644 index 31ead572f3..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/replay-utils.j2 +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -rollback() { - bucket_name=$1 - prefix=$2 - backup_dir=$3 - - src="s3://$bucket_name/$prefix/" - dst="s3://$bucket_name/$backup_dir/" - echo "Copy back the $prefix files to source directory $src from backup directory $dst" - aws s3 cp $dst $src --recursive --include "*" --region ap-south-1 -} - -delete() { - bucket_name=$1 - backup_dir=$2 - - path="s3://$bucket_name/$backup_dir/" - echo "Deleting the back-up files from $path" - aws s3 rm $path --recursive --region ap-south-1 -} - -backup() { - dt_start=$1 - dt_end=$2 - prefix=$4 - bucket_name=$3 - backup_dir=$5 - - ts_start=$(date -d $dt_start +%s) - ts_end=$(date -d $dt_end +%s) - src="s3://$bucket_name/$prefix/" - dst="s3://$bucket_name/$backup_dir/" - - - echo "Backing up the files from $src to $dst for the date range - ($dt_start, $dt_end)" - while [ $ts_start -le $ts_end ] - do - date=`date -d @$ts_start +%F` - aws s3 mv $src $dst --recursive --exclude "*" --include "$date-*" --region ap-south-1 - let ts_start+=86400 - done -} diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 deleted file mode 100644 index e6f1cdf9ad..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/run-dock-job.j2 +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -## Job to run daily -cd {{ analytics.home }}/scripts -source model-dock-config.sh -today=$(date "+%Y-%m-%d") - -libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" -file_path="dock-{{ env }}.conf" - -get_report_job_model_name(){ - case "$1" in - "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' - ;; - "sourcing-summary-report") echo 'org.sunbird.analytics.sourcing.SourcingSummaryReport' - ;; - "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' - ;; - "content-details") echo 'org.sunbird.analytics.sourcing.ContentDetailsReport' - ;; - *) echo $1 - ;; - esac -} - -if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi - -if [ ! -z "$1" ]; then job_config=$(config $1); else job_config="$2"; fi - -if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi - -echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" - -echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" - -nohup $SPARK_HOME/bin/spark-submit --conf spark.driver.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --conf spark.executor.extraJavaOptions="-Dconfig.file=$MODELS_HOME/$file_path" --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" \ >> "$DP_LOGS/$today-job-execution.log" 2>&1 - -echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 deleted file mode 100644 index 26ec84da87..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/run-job.j2 +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -## Job to run daily -cd {{ analytics.home }}/scripts -source model-config.sh -today=$(date "+%Y-%m-%d") - -libs_path="{{ analytics.home }}/models-{{ model_version }}/data-products-1.0" - -get_report_job_model_name(){ - case "$1" in - "course-enrollment-report") echo 'org.sunbird.analytics.job.report.CourseEnrollmentJob' - ;; - "course-consumption-report") echo 'org.sunbird.analytics.job.report.CourseConsumptionJob' - ;; - "funnel-report") echo 'org.sunbird.analytics.sourcing.FunnelReport' - ;; - "sourcing-metrics") echo 'org.sunbird.analytics.sourcing.SourcingMetrics' - ;; - "admin-geo-reports") echo 'org.sunbird.analytics.job.report.StateAdminGeoReportJob' - ;; - "etb-metrics") echo 'org.sunbird.analytics.job.report.ETBMetricsJob' - ;; - "admin-user-reports") echo 'org.sunbird.analytics.job.report.StateAdminReportJob' - ;; - "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' - ;; - "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' - ;; - "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' - ;; - "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' - ;; - "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' - ;; - "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' - ;; - "collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' - ;; - "program-collection-summary-report") echo 'org.sunbird.analytics.job.report.CollectionSummaryJob' - ;; - "collection-summary-report-v2") echo 'org.sunbird.analytics.job.report.CollectionSummaryJobV2' - ;; - "assessment-score-metric-correction") echo 'org.sunbird.analytics.audit.AssessmentScoreCorrectionJob' - ;; - "course-batch-status-updater") echo 'org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob' - ;; - "collection-reconciliation-job") echo 'org.sunbird.analytics.audit.CollectionReconciliationJob' - ;; - "assessment-correction") echo 'org.sunbird.analytics.job.report.AssessmentCorrectionJob' - ;; - "score-metric-migration-job") echo 'org.sunbird.analytics.audit.ScoreMetricMigrationJob' - ;; - "assessment-archival") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' - ;; - "assessment-archived-removal") echo 'org.sunbird.analytics.job.report.AssessmentArchivalJob' - ;; - "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' - ;; - "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' - ;; - *) echo $1 - ;; - esac -} - -if [ ! -z "$1" ]; then job_id=$(get_report_job_model_name $1); fi - -if [ ! -z "$1" ]; then job_config=$(config $1 $2); else job_config="$2"; fi - -if [ ! -z "$2" ]; then batchIds=";$2"; else batchIds=""; fi - - -echo "Starting the job - $1" >> "$DP_LOGS/$today-job-execution.log" - -echo "Job modelName - $job_id" >> "$DP_LOGS/$today-job-execution.log" - -nohup $SPARK_HOME/bin/spark-submit --master local[*] --jars $(echo ${libs_path}/lib/*.jar | tr ' ' ','),$MODELS_HOME/analytics-framework-2.0.jar,$MODELS_HOME/scruid_2.12-2.5.0.jar,$MODELS_HOME/batch-models-2.0.jar --class org.ekstep.analytics.job.JobExecutor $MODELS_HOME/batch-models-2.0.jar --model "$job_id" --config "$job_config$batchIds" >> "$DP_LOGS/$today-job-execution.log" 2>&1 - -echo "Job execution completed - $1" >> "$DP_LOGS/$today-job-execution.log" diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 deleted file mode 100644 index 2e613b9866..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/start-jobmanager.j2 +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version}}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -export SERVICE_LOGS={{ analytics.home }}/logs/services -export JM_HOME={{ analytics.home }}/job-manager - -export azure_storage_key={{sunbird_private_storage_account_name}} -export azure_storage_secret={{sunbird_private_storage_account_key}} -export reports_azure_storage_key={{sunbird_private_storage_account_name}} -export reports_azure_storage_secret={{sunbird_private_storage_account_key}} -export druid_storage_account_key={{sunbird_public_storage_account_name}} -export druid_storage_account_secret={{sunbird_public_storage_account_key}} - -export heap_conf_str={{ spark.heap_conf_str }} -today=$(date "+%Y-%m-%d") - -kill_job_manager() -{ - echo "Killing currently running job-manager process" >> "$SERVICE_LOGS/$today-job-manager.log" - kill $(ps aux | grep 'JobManager' | awk '{print $2}') >> "$SERVICE_LOGS/$today-job-manager.log" -} - -start_job_manager() -{ - kill_job_manager # Before starting the job, We are killing the job-manager - cd {{ analytics.home }}/scripts - source model-config.sh - job_config=$(config 'job-manager') - echo "Starting the job manager" >> "$SERVICE_LOGS/$today-job-manager.log" - echo "config: $job_config" >> "$SERVICE_LOGS/$today-job-manager.log" - nohup java $heap_conf_str -cp "$SPARK_HOME/jars/*:$MODELS_HOME/*:$MODELS_HOME/data-products-1.0/lib/*" -Dconfig.file=$MODELS_HOME/{{ env }}.conf org.ekstep.analytics.job.JobManager --config "$job_config" >> $SERVICE_LOGS/$today-job-manager.log 2>&1 & - - job_manager_pid=$(ps aux | grep 'JobManager' | awk '{print $2}') # Once Job is started just we are making whether job is running or not. - if [[ ! -z "$job_manager_pid" ]]; then - echo "Job manager is started." >> "$SERVICE_LOGS/$today-job-manager.log" - else - echo "Job manager is not started." >> "$SERVICE_LOGS/$today-job-manager.log" - fi -} -# Tasks -# Kill the job-manager -# Start the job-manager -# Make sure whether is running or not. -start_job_manager - diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 deleted file mode 100644 index 53c032cd29..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/submit-all-jobs.rb.j2 +++ /dev/null @@ -1,58 +0,0 @@ -require "ruby-kafka" -require 'json' - -@log = File.open("{{ analytics.home }}/logs/logfile.log", 'a') -@kafka = Kafka.new(["{{ kafka_broker_host }}"]) -@topic = "{{ analytics_job_queue_topic }}" -@report_list_jobs_url = "{{ report_list_jobs_url }}" -@submit_jobs_auth_token = "{{ submit_jobs_auth_token }}" -@submit_jobs_command = "source /mount/venv/bin/activate && dataproducts submit_druid_jobs --report_list_jobs_url #{@report_list_jobs_url} --auth_token #{@submit_jobs_auth_token}" - -def log(message) - @log.write("#{Time.now.to_s}: #{message}\n") -end - -def submit_all_jobs - report_jobs = { - "assessment-dashboard-metrics" => "org.sunbird.analytics.job.report.AssessmentMetricsJobV2", - "course-dashboard-metrics" => "org.sunbird.analytics.job.report.CourseMetricsJobV2", - "course-enrollment-report" => "org.sunbird.analytics.job.report.CourseEnrollmentJob", - "course-consumption-report" => "org.sunbird.analytics.job.report.CourseConsumptionJob", - "etb-metrics" => "org.sunbird.analytics.job.report.ETBMetricsJob", - "admin-geo-reports" => "org.sunbird.analytics.job.report.StateAdminGeoReportJob", - "admin-user-reports" => "org.sunbird.analytics.job.report.StateAdminReportJob" - } - jobs = [{{ analytics_job_list }}] - - log("Starting to submit #{jobs.count} jobs for processing") - file = File.read("{{ analytics.home }}/scripts/model-config.json") - file = file.gsub("$(date --date yesterday '+%Y-%m-%d')", `date --date yesterday '+%Y-%m-%d'`.strip) - file = file.gsub("$(date '+%Y-%m-%d')", `date "+%Y-%m-%d"`.strip) - config_hash = JSON.parse(file) - log("Config file loaded") - jobs.each do |job| - if job == "monitor-job-summ" - log("python") - system('/bin/bash -l -c "'+ @submit_jobs_command +'"') - submit_job(job, config_hash[job]) - elsif report_jobs[job].nil? - submit_job(job, config_hash[job]) - else - submit_job(report_jobs[job], config_hash[job]) - end - - log("Submitted #{jobs.count} jobs for processing") - end -end - -def submit_job(job, config) - job_config = {model: job, config: config}.to_json - log("message: #{job_config}") - @kafka.deliver_message(job_config, topic: @topic) - log("Submitted #{job} for processing") -end - - - - -submit_all_jobs diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 deleted file mode 100644 index 859cf602c3..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/submit-job.j2 +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -export SPARK_HOME={{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7 -export MODELS_HOME={{ analytics.home }}/models-{{ model_version }} -export DP_LOGS={{ analytics.home }}/logs/data-products -export KAFKA_HOME={{ analytics.soft_path }}/kafka_2.11-0.10.1.0 - -## job broker-list and kafka-topic -job_brokerList={{ brokerlist }} -job_topic={{ analytics_job_queue_topic }} - -## Job to run daily -cd {{ analytics.home }}/scripts -source model-config.sh -today=$(date "+%Y-%m-%d") - -if [ -z "$job_config" ]; then job_config=$(config $1); fi - -echo "Submitted $1 with config $job_config" >> "$DP_LOGS/$today-job-execution.log" -echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' >> "$DP_LOGS/$today-job-execution-debug.log" -echo '{ "model" :' \"$1\" ',' ' "config": ' "$job_config" '}' > /tmp/job-request.json -cat /tmp/job-request.json | $KAFKA_HOME/bin/kafka-console-producer.sh --broker-list $job_brokerList --topic $job_topic >> "$DP_LOGS/$today-job-execution.log" 2>&1 diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 deleted file mode 100644 index edd03ff36b..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/submit-script.j2 +++ /dev/null @@ -1,216 +0,0 @@ -#!/usr/bin/env bash - -## Job to run daily - -cd "{{ analytics_cluster.home }}" -source model-config.sh -today=$(date "+%Y-%m-%d") - -while :; do - case $1 in - -j|--job) shift - job="$1" - ;; - -m|--mode) shift - mode="$1" - ;; - -p|--parallelisation) shift - parallelisation=$1 - ;; - -pa|--partitions) shift - partitions=$1 - ;; - -sd|--startDate) shift - start_date=$1 - ;; - -ed|--endDate) shift - end_date=$1 - ;; - -h|--sparkMaster) shift - sparkMaster=$1 - ;; - -sp|--selectedPartitions) shift - selected_partitions=$1 - ;; - *) break - esac - shift -done - -get_report_job_model_name(){ - case "$1" in - "assessment-dashboard-metrics") echo 'org.sunbird.analytics.job.report.AssessmentMetricsJobV2' - ;; - "course-dashboard-metrics") echo 'org.sunbird.analytics.job.report.CourseMetricsJobV2' - ;; - "userinfo-exhaust") echo 'org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob' - ;; - "response-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJob' - ;; - "response-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2' - ;; - "progress-exhaust") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJob' - ;; - "progress-exhaust-v2") echo 'org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2' - ;; - "cassandra-migration") echo 'org.sunbird.analytics.updater.CassandraMigratorJob' - ;; - "uci-private-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob' - ;; - "uci-response-exhaust") echo 'org.sunbird.analytics.exhaust.uci.UCIResponseExhaustJob' - ;; - *) echo $1 - ;; - esac -} - -submit_cluster_job() { - # add batch number to config - echo "Running for below batch number $i" - batchNumberString="\\\"modelParams\\\":{\\\"batchNumber\\\":$i," - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"modelParams\":{'/$batchNumberString} - echo $finalConfig - echo "Running $job as parallel jobs" - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - echo "Submitted job for batchNumer $i below is the response" - echo $response -} - -job_id=$(get_report_job_model_name $job) - -if [ -z "$sparkMaster" ]; then sparkMaster="local[*]"; else sparkMaster="$sparkMaster"; fi - -if [ "$mode" = "via-partition" ]; then - endPartitions=`expr $partitions - 1` - if [ -z "$parallelisation" ]; then parallelisation=1; else parallelisation=$parallelisation; fi - # add partitions to config and start jobs - for i in $(seq 0 $parallelisation $endPartitions) - do - # add partitions to config - partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$(seq -s , $i `expr $i + $parallelisation - 1`)]" - if [ -z "$start_date" ]; then - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions." - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" - else - job_config=$(config $job '__endDate__') - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions via Replay-Supervisor." - classVariable="org.ekstep.analytics.job.ReplaySupervisor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" - fi - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - done - -elif [ "$mode" = "parallel-jobs" ]; then - # add batch number to config and submit jobs - echo "inside parallel-jobs block" - echo $parallelisation - if [ $parallelisation -ge 1 ]; then - for i in $(seq 1 $parallelisation) - do - submit_cluster_job $i & - done - else echo "No requests found in table"; fi - -elif [ "$mode" = "selected-partition" ]; then - # add partitions to config - partitionString="\\\"delta\\\":0,\\\"partitions\\\":[$selected_partitions]" - if [ -z "$start_date" ]; then - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions." - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\"]" - else - job_config=$(config $job '__endDate__') - cluster_job_config=${job_config//'"'/'\"'} - finalConfig=${cluster_job_config/'\"delta\":0'/$partitionString} - echo $finalConfig - echo "Running $job by partitions via Replay-Supervisor." - classVariable="org.ekstep.analytics.job.ReplaySupervisor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$finalConfig\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" - fi - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} -else - if [ -z "$start_date" ]; then - echo "Running $job without partition via run-job." - job_config=$(config $job) - cluster_job_config=${job_config//'"'/'\"'} - classVariable="org.ekstep.analytics.job.JobExecutor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\"]" - else - job_config=$(config $job '__endDate__') - cluster_job_config=${job_config//'"'/'\"'} - echo "Running $job without partition via Replay-Supervisor." - classVariable="org.ekstep.analytics.job.ReplaySupervisor" - argsList="\"args\": [\"--model\", \"$job_id\", \"--config\", \"$cluster_job_config\", \"--fromDate\", \"$start_date\", \"--toDate\", \"$end_date\"]" - fi - argsStr="\"className\": \"org.ekstep.analytics.job.JobExecutor\", $argsList" - echo $argsStr - clusterConfig=`cat cluster-config.json` - requestBody=${clusterConfig/'"className": "org.ekstep.analytics.job.JobExecutor"'/$argsStr} - finalRequestBody=${requestBody/'org.ekstep.analytics.job.JobExecutor'/$classVariable} - echo $finalRequestBody -{% if dp_object_store_type == "azure" %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ spark_cluster_name }}.azurehdinsight.net/livy/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% elif (dp_object_store_type == "oci") %} -{ - curl -k --user "{{ admin_name }}:{{ admin_password }}" -v -H "Content-Type: application/json" -X POST -d "$finalRequestBody" 'https://{{ bds_cluster_name }}un0.newbds.{{ vcn_name }}.oraclevcn.com:8998/batches' -H "X-Requested-By: {{ admin_name }}" -} -{% endif %} - -fi diff --git a/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 deleted file mode 100644 index cfd986b008..0000000000 --- a/ansible/roles/data-products-deploy-oci-bds/templates/update-job-requests.py.j2 +++ /dev/null @@ -1,119 +0,0 @@ -from __future__ import division -import math -import psycopg2 -import sys -import pandas as pd -from IPython.display import display -from psycopg2 import sql, connect -import json - - -def updateExhaustRequests(db, table, update_list): - for r in update_list: - cursor = db.cursor() - batchNum = r['batch_number'] - requestId = r['request_id'] - insertQry = "UPDATE {0} SET batch_number = {1} WHERE request_id = '{2}'".format(table, batchNum, requestId) - n = cursor.execute(insertQry) - -def updateDruidRequests(db, table, update_list): - for r in update_list: - cursor = db.cursor() - batchNum = r['batch_number'] - reportId = r['report_id'] - insertQry = "UPDATE {0} SET batch_number = {1} WHERE report_id = '{2}'".format(table, batchNum, reportId) - n = cursor.execute(insertQry) - -def processRequests(totalRequestsDf, jobId, batchSize, db, table,jobType): - # Compute parallelism from batchSize & totalRequests - # update batch_number to table - - totalRequests = len(totalRequestsDf.index) - print("totalRequests {0}".format(totalRequests)) - - parallelism = int(math.ceil(totalRequests/batchSize)) - print("parallelism computed {0}".format(parallelism)) - - if totalRequests > 0: - if jobType == 'exhaust': - totalRequestsDf["row_num"] = totalRequestsDf.groupby(by=['job_id'])['request_id'].transform(lambda x: x.rank()) - else: - totalRequestsDf["row_num"] = totalRequestsDf['report_id'].transform(lambda x: x.rank()) - #display(totalRequestsDf) - - start_index = 1 - end_index = batchSize - for i in range(1, parallelism+1): - subSetDf = totalRequestsDf[(totalRequestsDf['row_num'] >= start_index) & (totalRequestsDf['row_num'] <= end_index)] - subSetDf["batch_number"] = i - print(start_index,end_index) - if jobType == 'exhaust': - updateExhaustRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) - else: - updateDruidRequests(db, table, json.loads(subSetDf.to_json(orient='records'))) - start_index = 1 + end_index - end_index = end_index + batchSize - db.commit() - db.close() - return parallelism - else: - return 0 - -def postgresql_to_dataframe(db, select_query, column_names): - cursor = db.cursor() - try: - cursor.execute(select_query) - except (Exception, psycopg2.DatabaseError) as error: - print("Error: %s" % error) - return 1 - - tupples = cursor.fetchall() - - df = pd.DataFrame(tupples, columns=column_names) - #display(df) - return df - -def get_columns_names(db,table): - columns = [] - col_cursor = db.cursor() - col_names_str = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE " - col_names_str += "table_name = '{}';".format( table ) - try: - sql_object = sql.SQL(col_names_str).format(sql.Identifier( table)) - col_cursor.execute( sql_object ) - col_names = (col_cursor.fetchall()) - for tup in col_names: - columns += [ tup[0] ] - col_cursor.close() - except Exception as err: - print ("get_columns_names ERROR:", err) - return columns - -def main(batchSize, jobId,jobType,table): - host="{{postgres.db_url}}" - port={{postgres.db_port}} - user="{{postgres.db_username}}" - password="{{postgres.db_password}}" - database="{{postgres.db_name}}" - url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) - - db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) - - column_names = get_columns_names(db, table) - - if jobType == 'exhaust': - jobId = jobId.split("-v2")[0] if "-v2" in jobId else jobId - selectQuery = "select * from {0} where job_id = '{1}' and status IN ('SUBMITTED', 'FAILED') and iteration < 3;".format(table, jobId) - else: - selectQuery = "select * from {0} where status IN ('ACTIVE')".format(table) - df = postgresql_to_dataframe(db, selectQuery, column_names) - - parallelism = processRequests(df, jobId, batchSize, db, table,jobType) - return parallelism - -batchSize =int(sys.argv[2]) -jobId=sys.argv[1] -jobType = sys.argv[3] -table = sys.argv[4] -parallelism = main(batchSize, jobId,jobType,table) -print("returning parallelism value: {0}".format(parallelism)) From 5ebf3298b3f83bee83e1b1ec05ae2d8394a19687 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 15:32:47 +1000 Subject: [PATCH 100/203] updated spark env Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 4 ++-- .../data-products-deploy/templates/update-job-requests.py.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index ee6b40c1cb..d26896b348 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -434,10 +434,10 @@ shell: | if echo "{{jobs}}" | grep 'druid' then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{env}}_report_config + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} druid {{spark_env}}_report_config elif echo "{{jobs}}" | grep 'exhaust' then - python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{env}}_job_request + python {{ analytics_cluster.home }}/update-job-requests.py {{ jobs }} {{ batch_size }} exhaust {{spark_env}}_job_request fi tags: - parallel-jobs-submit diff --git a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 index cfd986b008..4085041965 100644 --- a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 +++ b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 @@ -94,7 +94,7 @@ def main(batchSize, jobId,jobType,table): port={{postgres.db_port}} user="{{postgres.db_username}}" password="{{postgres.db_password}}" - database="{{postgres.db_name}}" + database="{{postgres.spark_db_name}}" url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) From d584d8f0ff7d72e3b1884bf3ecae127707b1e510 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 15:37:24 +1000 Subject: [PATCH 101/203] python debug Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index d26896b348..57acbbe83d 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -430,6 +430,18 @@ tags: - parallel-jobs-submit +- name: Check python version + shell: python --version + register: python_version + tags: + - parallel-jobs-submit + +- name: Check python version + debug: | + msg: {{python_version.stdout}} + tags: + - parallel-jobs-submit + - name: Execute python script to populate batch numbers shell: | if echo "{{jobs}}" | grep 'druid' From 4c152bdebd6325f56d6724a08a9bc01e61248a21 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 16:22:36 +1000 Subject: [PATCH 102/203] updated model to include storege keys Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 12 ------------ .../data-products-deploy/templates/model-config.j2 | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 57acbbe83d..d26896b348 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -430,18 +430,6 @@ tags: - parallel-jobs-submit -- name: Check python version - shell: python --version - register: python_version - tags: - - parallel-jobs-submit - -- name: Check python version - debug: | - msg: {{python_version.stdout}} - tags: - - parallel-jobs-submit - - name: Execute python script to populate batch numbers shell: | if echo "{{jobs}}" | grep 'druid' diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 86f376b65d..89ca4128c4 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 6ae6963cf1dc3eccc867a6a69a85a6703bd3a0f2 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 10 Apr 2023 17:32:25 +1000 Subject: [PATCH 103/203] updated spark db name Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index e0ec7005df..9ee857590a 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -223,7 +223,7 @@ metric.kafka.broker="{{groups['processing-cluster-kafka']|join(':9092,')}}:9092" metric.kafka.topic="{{ env }}.prom.monitoring.metrics" //Postgres Config -postgres.db="{{postgres.db_name}}" +postgres.db="{{postgres.spark_db_name}}" postgres.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" postgres.user="{{postgres.db_username}}" postgres.pass="{{postgres.db_password}}" From 47946eb56daddaf5a82fc0eb8381e0d2d7b780c7 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 11 Apr 2023 09:46:56 +1000 Subject: [PATCH 104/203] disabled verbose logging for ansible Signed-off-by: Deepak Devadathan --- pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel index dad65d4e73..480e880609 100644 --- a/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel +++ b/pipelines/deploy/spark-cluster-deploy/Jenkinsfile.parallel @@ -26,7 +26,7 @@ node() { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/spark-cluster-job-submit.yml" - ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} oci_install_loc=${params.oci_install_loc} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type} -vvvv " + ansibleExtraArgs = "--vault-password-file /var/lib/jenkins/secrets/vault-pass --extra-vars \"jobs=${params.jobs_to_submit} oci_install_loc=${params.oci_install_loc} batch_size=${params.batch_size}\" --tags config-update,${params.jobs_submit_type}" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 341ef4fd11aab68e27fa31b18b02950d339b99bc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 11 Apr 2023 18:09:09 +1000 Subject: [PATCH 105/203] updated model parameters for userinfo-exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 89ca4128c4..9719dae2a7 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -68,7 +68,7 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' From 11d111c825e501180bac2e44b66c1ad30ca2aa0c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 11 Apr 2023 18:16:52 +1000 Subject: [PATCH 106/203] added storage key and secret for response-exhaust-v2 Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 9719dae2a7..4efbd29bc9 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -77,7 +77,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' ;; "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' From dfde1795e4d5bedffd1fbddc0a1f39e881c475e1 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 13 Apr 2023 16:39:38 +1000 Subject: [PATCH 107/203] added bucket as a parameter Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 3deb57cc6a..e2903ca267 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} bucket=${params.bucket} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 4a4de7e9569a57d42895a07a253e281ab2a527dd Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 13 Apr 2023 17:33:22 +1000 Subject: [PATCH 108/203] added the bucket name to the sprk provisioning script Signed-off-by: Deepak Devadathan --- pipelines/provision/spark/Jenkinsfile.bds | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index e2903ca267..1b754d1678 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -33,6 +33,7 @@ node('build-slave') { } stage('create and provision spark OCI BDS') { oci_namespace=params.oci_namespace + bucket=params.bucket withCredentials([usernamePassword(credentialsId: 'oci-bds-credential', passwordVariable: 'cluster_password', usernameVariable: 'ambari_user')]) { sh ''' currentws=$(pwd) @@ -40,7 +41,7 @@ node('build-slave') { cd /tmp ./create-cluster.sh $ambari_user $cluster_password export ANSIBLE_HOST_KEY_CHECKING=False - ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass + ansible-playbook -i $currentws/ansible/inventory/env $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace bucket=$bucket" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass ''' } From 640ff44deab11987652d2a81d0691154101d6771 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 13 Apr 2023 23:22:05 +1000 Subject: [PATCH 109/203] for testing added batchmodel in jars list Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 12ebf0bde0..2f5ee03ca8 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -60,7 +60,8 @@ "jars": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ scruid_artifact }}", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}", + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}" ], "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ From 36d96d5be138aa27978db43867b2ccf95e24c705 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 14 Apr 2023 20:49:42 +1000 Subject: [PATCH 110/203] testing change Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 2f5ee03ca8..d8dc0eb033 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -80,6 +80,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", + "spark.executor.userClassPathFirst: true", + "spark.driver.userClassPathFirst: true", "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } From 2fa3df190c4c03b2ce0561991f6c64ac4c91f946 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 14 Apr 2023 20:57:25 +1000 Subject: [PATCH 111/203] corrected json Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index d8dc0eb033..ea641acdc5 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -80,8 +80,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.executor.userClassPathFirst: true", - "spark.driver.userClassPathFirst: true", + "spark.executor.userClassPathFirst": "true", + "spark.driver.userClassPathFirst": "true", "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } From 444108df96b83e63ecef75a08ab3b17c75b6e0ee Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Fri, 14 Apr 2023 22:36:31 +0530 Subject: [PATCH 112/203] removed the classpath params --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index ea641acdc5..2f5ee03ca8 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -80,8 +80,6 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.executor.userClassPathFirst": "true", - "spark.driver.userClassPathFirst": "true", "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } From d53cdbd8a4b39a6af7b95c141d7a12c1a17aee6f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 01:52:15 +1000 Subject: [PATCH 113/203] added model params storageContainer for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 4efbd29bc9..9690ac0330 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From de04dd91ff8c8b954aca8052d8c64e4e10e9f9ce Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 03:08:04 +1000 Subject: [PATCH 114/203] added store as s3 for testing Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 9690ac0330..f7edaf85c1 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 3be25300f439187f92f0c7d41fdfe9f3870db473 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 03:23:10 +1000 Subject: [PATCH 115/203] corrected json syntax for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index f7edaf85c1..fc42846675 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200},"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 4325d723422dc066028943feec987b7c4a58479a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 04:21:23 +1000 Subject: [PATCH 116/203] removed store from model params for progress exhust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index fc42846675..77b50d6762 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 3df3cbc399ff994dd65ac442b4ad349c9b3c3394 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Sun, 16 Apr 2023 23:27:05 +1000 Subject: [PATCH 117/203] added store as s3 for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 77b50d6762..fc42846675 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 70fc149966616eb5ffdc493fb38d43219fbb0f69 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 00:07:07 +1000 Subject: [PATCH 118/203] added store key and secret placeholders Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index fc42846675..41e6dc7313 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From afec78c11f13967921c4cbdb82f33dcd6a052bfc Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 06:45:35 +1000 Subject: [PATCH 119/203] updated model-config for endpoint addition Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 41e6dc7313..8c30e4cb75 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 9367598d23f8ac17b0ff50b9a6ffb8789191f9d9 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 06:51:12 +1000 Subject: [PATCH 120/203] variablized store type Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 8c30e4cb75..e6bf1bca56 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"s3","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 7f717dbf39bc6c6f6de68118b0c7b8208679773d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 07:42:10 +1000 Subject: [PATCH 121/203] updated model config for progress exhaust Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index e6bf1bca56..bec9c69a42 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"reports.storage.key.config", "storageSecretConfig":"reports.storage.secret.config","storageContainer":"{{secor_bucket}}","storageEndpoint":"cloud_storage_endpoint_with_protocol", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From 71531b4f57cb2ec44ad61a1dfcfb5a3ac8c59c31 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 17 Apr 2023 12:31:26 +1000 Subject: [PATCH 122/203] changed the report container Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index bec9c69a42..34e5b01aa3 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{secor_bucket}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From f63f5b9e267210067c688f00a624de0bd15ebf5b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 14:41:47 +1000 Subject: [PATCH 123/203] updated progress-exhaust config Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 34e5b01aa3..4257d79cb9 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -80,7 +80,7 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' From eb653b271324eaabc6f036858aa3760f67a8f033 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 18:43:33 +1000 Subject: [PATCH 124/203] updated model config for userinfo Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 4257d79cb9..baccd205bb 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -68,7 +68,7 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' From 8ffb87f2e66f4328c37b855e68d388e3ac4aca1a Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:16:43 +1000 Subject: [PATCH 125/203] added jets3t properties Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 2f5ee03ca8..bdefbd27a9 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -65,7 +65,8 @@ ], "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", + "/usr/odh/2.0.1/spark/conf/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 46f5d8ab424c56cce86f2884db7abc9c1d4ebe64 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:24:43 +1000 Subject: [PATCH 126/203] updated the list of files Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index bdefbd27a9..1af3854ec2 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -66,7 +66,7 @@ "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", - "/usr/odh/2.0.1/spark/conf/jets3t.properties" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 729f6fa366f3d029a302526439962a0cda38ec29 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:28:38 +1000 Subject: [PATCH 127/203] added local jets3t files Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1af3854ec2..3b9910f6cf 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -66,7 +66,7 @@ "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", - "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties" + "local:///usr/odh/2.0.1/spark/conf/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 7cb0be4df161c566bc84d5dbaab263042d6d3768 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 18 Apr 2023 22:33:43 +1000 Subject: [PATCH 128/203] added files for jets3t Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/templates/cluster-config.json.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 3b9910f6cf..1af3854ec2 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -66,7 +66,7 @@ "file": "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", "files": [ "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf", - "local:///usr/odh/2.0.1/spark/conf/jets3t.properties" + "oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties" ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, From 988b7bb5dd4d0f53b222ec43789d2155ccead330 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 09:59:24 +1000 Subject: [PATCH 129/203] updated model config for userinfo-exhaust Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1af3854ec2..49acc70781 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -81,8 +81,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}" } } {% endif %} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index baccd205bb..bb08633a47 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -68,7 +68,7 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' From b84e7eaa1eed9c8715b03af01040602c11bd1edd Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 12:13:47 +1000 Subject: [PATCH 130/203] updated for flexible sizing Signed-off-by: Deepak Devadathan --- .../oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 6 +++--- pipelines/provision/spark/Jenkinsfile.bds | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index 2e880a92d2..fea3bd313e 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -246,17 +246,17 @@ function create_cluster() { for i in `seq 1 $master` do - json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $utility` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 32, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $worker` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": 16, \"ocpus\": 3},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{worker_node_memory}}", \"ocpus\": "{{worker_node_cpu}}"},\"subnetId\": \"$subnet\" }" done json="$json]" diff --git a/pipelines/provision/spark/Jenkinsfile.bds b/pipelines/provision/spark/Jenkinsfile.bds index 1b754d1678..418d5ded0a 100644 --- a/pipelines/provision/spark/Jenkinsfile.bds +++ b/pipelines/provision/spark/Jenkinsfile.bds @@ -20,7 +20,7 @@ node('build-slave') { jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() currentWs = sh(returnStdout: true, script: 'pwd').trim() ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} key_alias=${params.key_alias} user_id=${params.user_id} bucket=${params.bucket} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" + ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} head_node_cpu=${params.head_node_cpu} worker_node_cpu=${params.worker_node_cpu} head_node_memory=${params.head_node_memory} worker_node_memory=${params.worker_node_memory} key_alias=${params.key_alias} user_id=${params.user_id} bucket=${params.bucket} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.cluster_state}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" values.put('currentWs', currentWs) values.put('env', envDir) values.put('module', module) From 054e5c426e4e8eec64334e7c9ab4b81946dbc73c Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 12:36:42 +1000 Subject: [PATCH 131/203] added jets3t properties for spark cluster run Signed-off-by: Deepak Devadathan --- .../data-products-deploy/defaults/main.yml | 10 +++++++++- .../roles/data-products-deploy/tasks/main.yml | 18 ++++++++++++++++++ .../data-products-deploy/templates/jets3t.j2 | 8 ++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 ansible/roles/data-products-deploy/templates/jets3t.j2 diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index 38ddd70420..fec9fc0816 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -280,4 +280,12 @@ assessment_metric_primary_category: "{{ exhaust_job_assessment_primary_category # Default s3 variables sunbird_private_s3_storage_key: "" -sunbird_private_s3_storage_secret: "" \ No newline at end of file +sunbird_private_s3_storage_secret: "" + + +# jets3t s3 config, allows us to configure for s3-like object stores +jets3t_s3_request_signature_version: "{{ s3_request_signature_version }}" +jets3t_s3_endpoint_host: "{% if s3_storage_endpoint %}{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}{% endif %}" +jets3t_s3_disable_dns_buckets: "{{ s3_path_style_access }}" +jets3t_s3_https_only: "{{ s3_https_only }}" +jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}" \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index d26896b348..1db6a6cc3b 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -393,6 +393,24 @@ - run-job - config-update +- name: Copy jets3t.properties file + template: src=jets3t.j2 dest={{ analytics_cluster.home }}/jets3t.properties + delegate_to: localhost + tags: + - replay-job + - run-job + - config-update + +- name: Copy JetS3t.properties to oci oss + command: "{{oci_install_loc}}/oci os object put -bn {{ bucket }} --name models-{{ model_version }}/jets3t.properties --file {{ analytics_cluster.home }}/jets3t.properties --content-type auto --force" + async: 3600 + poll: 10 + when: dp_object_store_type == "oci" + tags: + - replay-job + - run-job + - config-update + - name: Replay Job shell: "nohup {{ analytics_cluster.home }}/submit-script.sh --job {{ job_id }} --mode {{ mode }} --partitions {{ partitions }} --parallelisation {{ parallelisation }} --startDate {{ start_date }} --endDate {{ end_date }} --sparkMaster {{ sparkMaster }} --selectedPartitions {{ selected_partitions }} &" async: "{{ (pause_min * 60) }}" diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 new file mode 100644 index 0000000000..4de8480bc2 --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -0,0 +1,8 @@ +storage-service.request-signature-version={{ jets3t_s3_request_signature_version }} +s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} +s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }} +s3service.https-only={{ jets3t_s3_https_only }} +{% if jets3t_s3_default_bucket_location %} +s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} +{% endif %} +uploads.stream-retry-buffer-size=2147483646 From 37ed4118c97784001a09407ed49fd0da4b65d1c8 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 12:46:25 +1000 Subject: [PATCH 132/203] corrected jinja syntax Signed-off-by: Deepak Devadathan --- .../oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index fea3bd313e..ce84928ce1 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -246,17 +246,17 @@ function create_cluster() { for i in `seq 1 $master` do - json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" + json="$json{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"MASTER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{head_node_memory}}, \"ocpus\": {{head_node_cpu}}},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $utility` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{head_node_memory}}", \"ocpus\": "{{head_node_cpu}}"},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"UTILITY\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{head_node_memory}}, \"ocpus\": {{head_node_cpu}}},\"subnetId\": \"$subnet\" }" done for i in `seq 1 $worker` do - json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": "{{worker_node_memory}}", \"ocpus\": "{{worker_node_cpu}}"},\"subnetId\": \"$subnet\" }" + json="$json,{\"blockVolumeSizeInGBs\": 1000,\"nodeType\": \"WORKER\",\"shape\": \"VM.Standard.E4.Flex\",\"shapeConfig\": { \"memoryInGBs\": {{worker_node_memory}}, \"ocpus\": {{worker_node_cpu}}},\"subnetId\": \"$subnet\" }" done json="$json]" From 021c239d3df45651444bf322cc5fb8c67222454e Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 13:26:06 +1000 Subject: [PATCH 133/203] added download jets3t properties Signed-off-by: Deepak Devadathan --- ansible/roles/provision-oci-spark-cluster/tasks/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml index 9df37915bd..9f766b2d65 100644 --- a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml +++ b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml @@ -73,6 +73,8 @@ - name: Download config to livy command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf {{ spark_folder }}/conf/application.conf +- name: Download jets3t config to livy + command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/jets3t.properties {{ spark_folder }}/conf/jets3t.properties - name: Update log4j.properties From e8230839264cdd6522f5ee12c6d1e56076a6c70f Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 13:46:45 +1000 Subject: [PATCH 134/203] added additional jar download Signed-off-by: Deepak Devadathan --- .../roles/provision-oci-spark-cluster/defaults/main.yml | 9 +++++++-- ansible/roles/provision-oci-spark-cluster/tasks/main.yml | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ansible/roles/provision-oci-spark-cluster/defaults/main.yml b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml index 11e3e6357b..777554f8ba 100644 --- a/ansible/roles/provision-oci-spark-cluster/defaults/main.yml +++ b/ansible/roles/provision-oci-spark-cluster/defaults/main.yml @@ -19,9 +19,11 @@ jedis_version: 3.2.0 zip4j_version: 2.6.2 guice_version: 3.0 -jets3t_version: 0.9.4 +jets3t_version: 0.9.7 hadoop_aws_version: 2.7.3 java_xmlbuilder_version: 1.1 +cassandra_connector_version: 3.2.0 +commons_pool_version: 2.0 guava_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_version}}/guava-{{guava_version}}.jar guava_jre_url: https://repo1.maven.org/maven2/com/google/guava/guava/{{guava_jre_version}}/guava-{{guava_jre_version}}.jar @@ -33,7 +35,10 @@ zip4j_url: https://repo1.maven.org/maven2/net/lingala/zip4j/zip4j/{{zip4j_versio guice_url: https://repo1.maven.org/maven2/com/google/inject/guice/{{guice_version}}/guice-{{guice_version}}.jar guice_servlet_url: https://repo1.maven.org/maven2/com/google/inject/extensions/guice-servlet/{{guice_version}}/guice-servlet-{{guice_version}}.jar -jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +# jets3t_url: https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar +jets3t_url: https://repo1.maven.org/maven2/org/jets3t/jets3t/{{jets3t_version}}/jets3t-{{jets3t_version}}.jar hadoop_aws_url: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/{{hadoop_aws_version}}/hadoop-aws-{{hadoop_aws_version}}.jar java_xmlbuilder_url: https://repo1.maven.org/maven2/com/jamesmurty/utils/java-xmlbuilder/{{java_xmlbuilder_version}}/java-xmlbuilder-{{java_xmlbuilder_version}}.jar +common_pool_url: "https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/{{commons_pool_version}}/commons-pool2-{{commons_pool_version}}.jar" +spark_cassandra_connector_assembly_url: "https://repo1.maven.org/maven2/com/datastax/spark/spark-cassandra-connector-assembly_2.12/{{cassandra_connector_version}}/spark-cassandra-connector-assembly_2.12-{{cassandra_connector_version}}.jar" diff --git a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml index 9f766b2d65..4c2302d168 100644 --- a/ansible/roles/provision-oci-spark-cluster/tasks/main.yml +++ b/ansible/roles/provision-oci-spark-cluster/tasks/main.yml @@ -69,6 +69,14 @@ become: yes get_url: url={{ java_xmlbuilder_url }} dest={{ spark_folder }}/jars/java-xmlbuilder-{{java_xmlbuilder_version}}.jar timeout=1000 force=no +- name: Download spark_cassandra_connector and copy to Spark jars folder + become: yes + get_url: url={{ spark_cassandra_connector_assembly_url }} dest={{ spark_folder }}/jars/spark-cassandra-connector-assembly_2.12-{{cassandra_connector_version}}.jar timeout=1000 force=no + +- name: Download common_pool_url and copy to Spark jars folder + become: yes + get_url: url={{ common_pool_url }} dest={{ spark_folder }}/jars/commons-pool2-{{commons_pool_version}}.jar timeout=1000 force=no + - name: Download config to livy command: hdfs dfs -get -f oci://{{ bucket }}@{{ oci_namespace }}/models-{{ model_version }}/application.conf {{ spark_folder }}/conf/application.conf From 21c13278749984bffffa8991b8dbd34322e697a5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 16:21:33 +1000 Subject: [PATCH 135/203] corrected jinja Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 4de8480bc2..5f97950498 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -1,8 +1,8 @@ storage-service.request-signature-version={{ jets3t_s3_request_signature_version }} -s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }} s3service.https-only={{ jets3t_s3_https_only }} {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} uploads.stream-retry-buffer-size=2147483646 +s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From 223af2da8d43214c167a294aef38ef947270d403 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 19 Apr 2023 23:47:13 +1000 Subject: [PATCH 136/203] s3service.https-only to true for jets3t Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 5f97950498..1ca346578c 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -1,6 +1,6 @@ storage-service.request-signature-version={{ jets3t_s3_request_signature_version }} s3service.disable-dns-buckets={{ jets3t_s3_disable_dns_buckets }} -s3service.https-only={{ jets3t_s3_https_only }} +s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} From 1d19f60a6d1049796acbdc1f6c1b668971ca7ef5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 10:01:02 +1000 Subject: [PATCH 137/203] updated report bucket name Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 9ee857590a..8b2f19dad9 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -117,7 +117,7 @@ azure { } ## Reports - Global config -cloud.container.reports="reports" +cloud.container.reports="{{cloud_storage_privatereports_bucketname}}" # course metrics container in azure course.metrics.cassandra.sunbirdKeyspace="sunbird" From 8f99438850dac173b8de5284383fa0934d5c7052 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 10:08:22 +1000 Subject: [PATCH 138/203] updated report verifiction bucket Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 8b2f19dad9..c9a3ba285e 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -263,7 +263,7 @@ druid.report.default.storage="s3" {% endif %} druid.report.date.format="yyyy-MM-dd" -druid.report.default.container="report-verification" +druid.report.default.container="{{cloud_storage_report_verfication_bucketname}}" ## Collection Exhaust Jobs Configuration -- Start ## From 4399d57fbba239dc68c30459c8c1fec4226e4715 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 13:20:58 +1000 Subject: [PATCH 139/203] updated jets3t prop Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 1ca346578c..09538d18a1 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=2147483646 +uploads.stream-retry-buffer-size=268435456 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From fe2d983d3ea1e54ed673535462d70cc8bebf4e56 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 13:46:37 +1000 Subject: [PATCH 140/203] updated upload.stream buffer value Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 09538d18a1..661c9c576c 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=268435456 +uploads.stream-retry-buffer-size=131072 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From a0c91349ff84546f2d1aaeb154d34b54654d9454 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 15:12:50 +1000 Subject: [PATCH 141/203] testing with added parameter to reset mark error Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/cluster-config.json.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 49acc70781..0c3ba9b886 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -81,8 +81,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}, -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=512m", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/spark3/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/odh/2.0.1/spark/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}} -Daws_storage_key={{ sunbird_private_storage_account_name }} -Daws_storage_secret={{ sunbird_private_storage_account_key }} -Dcom.amazonaws.sdk.s3.defaultStreamBufferSize=512m" } } {% endif %} \ No newline at end of file From 5c708811158a0fb8e5cb7947b8eb2f3bd5794d92 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 25 Apr 2023 15:41:51 +1000 Subject: [PATCH 142/203] changed buffer size Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/jets3t.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/jets3t.j2 b/ansible/roles/data-products-deploy/templates/jets3t.j2 index 661c9c576c..1ca346578c 100644 --- a/ansible/roles/data-products-deploy/templates/jets3t.j2 +++ b/ansible/roles/data-products-deploy/templates/jets3t.j2 @@ -4,5 +4,5 @@ s3service.https-only=true {% if jets3t_s3_default_bucket_location %} s3service.default-bucket-location={{ jets3t_s3_default_bucket_location }} {% endif %} -uploads.stream-retry-buffer-size=131072 +uploads.stream-retry-buffer-size=2147483646 s3service.s3-endpoint={% if jets3t_s3_endpoint_host %}{{ jets3t_s3_endpoint_host }}{% else %}s3-ap-south-1.amazonaws.com{% endif %} From 057b7d7f536223a422874a04efd5bf12212f7281 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 14:12:20 +1000 Subject: [PATCH 143/203] updated model-config for druid-dataset Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/model-config.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index bb08633a47..e13f465b93 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -141,7 +141,7 @@ config() { echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' ;; "druid-dataset") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","format":"csv"},"output":[{"to": "console","params": {"printEvent": false}}],"parallelization":8,"appName":"ML Druid Data Model"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ML Druid Data Model"}' ;; "*") echo "Unknown model code" From fe245eada2ccf5d9218e3bac96d9fdff6bf8e24d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 16:01:26 +1000 Subject: [PATCH 144/203] updated the model-config params Signed-off-by: Deepak Devadathan --- .../templates/model-config.j2 | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index e13f465b93..95a2bf5515 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -33,21 +33,21 @@ config() { echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"model":"org.sunbird.analytics.model.report.AssessmentCorrectionModel","modelParams":{"parallelization":200,"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","granularity":"all","aggregations":[{"name":"count","type":"count","fieldName":"count"}],"dimensions":[{"fieldName":"identifier","aliasName":"identifier"}],"filters":[{"type":"equals","dimension":"contentType","value":"SelfAssess"}],"descending":"false"},"fileOutputConfig":{"to":"file","params":{"file":"{{ analytics.home }}/assessment-correction/skippedEvents"}},"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"kafka","params":{"brokerList":"'$brokerIngestionList'","topic":"'$assessTopic'"}}],"parallelization":200,"appName":"Assessment Correction Model"}' ;; "assessment-archival") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"azure","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}' ;; "assessment-archived-removal") {% if dp_object_store_type == "azure" %} echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"azure","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% elif (dp_object_store_type == "oci" or dp_object_store_type == "s3") %} - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"reports"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":true,"azureFetcherConfig":{"store":"{{dp_object_store_type}}","blobExt":"csv.gz","reportPath":"archived-data/","container":"{{reports_container}}"},"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Removal Job"}' ;; {% endif %} "collection-reconciliation-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CollectionReconciliationJob","modelParams":{"mode":"prodrun","brokerList":"{{ingestion_kafka_broker_host}}","topic":"{{env}}.issue.certificate.request","sparkCassandraConnectionHost":"{{ core_cassandra_host }}"},"parallelization":30,"appName":"CollectionReconciliationJob"}' ;; "collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"], "contentType": "Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course"},"fields":["identifier","name","organisation","channel"],"limit":10000}},"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "score-metric-migration-job") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.ScoreMetricMigrationJob","modelParams":{"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Score Metric Migration Job"}' @@ -56,34 +56,34 @@ config() { echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.AssessmentScoreCorrectionJob","modelParams":{"assessment.score.correction.batches":"","cassandraReadConsistency":"QUORUM","cassandraWriteConsistency":"QUORUM","csvPath":"/mount/data/analytics/score_correction","isDryRunMode":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":30,"appName":"Assessment Score Correction Job"}' ;; "course-batch-status-updater") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.audit.CourseBatchStatusUpdaterJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"http://{{ single_node_es_host }}:9200","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","kpLearningBasePath":"http://{{groups['learning'][0]}}:8080/learning-service","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Course Batch Status Updater Job"}' ;; "collection-summary-report-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"druid_storage_account_key","storageSecretConfig":"druid_storage_account_secret","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJobV2","modelParams":{"storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","batchSize":50,"generateForAllBatches":true,"contentFields":["identifier","name","organisation","channel","status","keywords","createdFor","medium","subject"],"contentStatus":["Live","Unlisted","Retired"],"store":"{{dp_object_store_type}}","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","specPath":"/mount/data/analytics/scripts/collection-summary-ingestion-spec.json","druidIngestionUrl":"'$druidIngestionURL'","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report V2"}' ;; "uci-private-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageContainer":"reports","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.uci.UCIPrivateExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UCI Private Exhaust"}' ;; "uci-response-exhaust") - echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":"","storageContainer":"reports"},"parallelization":8,"appName":"UCI Response Exhaust"}' + echo '{"search":{"type":"{{dp_object_store_type}}","queries":[{"bucket":"'$bucket'","prefix":"unique/raw/","endDate":"'$endDate'","delta":0}]},"filters":[{"name":"eid","operator":"EQ","value":"ASSESS"}],"model":"org.sunbird.analytics.uci.UCIResponseExhaust","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","botPdataId":"{{ uci_pdata_id }}","mode":"OnDemand","fromDate":"","toDate":""},"parallelization":8,"appName":"UCI Response Exhaust"}' ;; "userinfo-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageContainer":"{{reports_container}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.UserInfoExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"UserInfo Exhaust"}' ;; "program-collection-summary-report") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"], "keywords":"'$keyword'", "store":"{{dp_object_store_type}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.CollectionSummaryJob","modelParams":{"searchFilter":{"request":{"filters":{"status":["Live"],"contentType":"Course","keywords":["'$keyword'"]},"fields":["identifier","name","organisation","channel"],"limit":10000}},"columns":["Published by","Batch id","Collection id","Collection name","Batch start date","Batch end date","State","Total enrolments By State","Total completion By State"],"keywords":"'$keyword'","store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Collection Summary Report"}' ;; "response-exhaust") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust"}' ;; "response-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ResponseExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Response Exhaust V2"}' ;; "progress-exhaust") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config", "storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}", "apiVersion":"v2", "parallelization":200,"mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust"}' ;; "progress-exhaust-v2") - echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","batchFilters":["TPD"],"searchFilter":{}, "sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}", "sparkCassandraConnectionHost":"{{ core_cassandra_host }}", "fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' + echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.collection.ProgressExhaustJobV2","modelParams":{"store":"{{dp_object_store_type}}","mode":"OnDemand","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","batchFilters":["TPD"],"searchFilter":{},"sparkElasticsearchConnectionHost":"{{ sunbird_es_host }}","sparkRedisConnectionHost":"{{ metadata2_redis_host }}","sparkUserDbRedisIndex":"12","sparkUserDbRedisPort":"{{ user_port }}","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Progress Exhaust V2"}' ;; "druid_reports") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.DruidQueryProcessingModel","modelParams":{"mode":"batch"},"parallelization":8,"appName":"Druid Reports"}' @@ -98,7 +98,7 @@ config() { echo '{"jobsCount":'$jobManagerJobsCount',"topic":"'$job_topic'","bootStrapServer":"'$brokerList'","zookeeperConnect":"'$zookeeper'","consumerGroup":"jobmanager","slackChannel":"#test_channel","slackUserName":"JobManager","tempBucket":"'$bucket'","tempFolder":"'$temp_folder'"}' ;; "wfs") - echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"{{ dp_storage_key_config }}", "storageSecretConfig":"{{ dp_storage_secret_config }}", "apiVersion":"v2", "parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"} }],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' + echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"{{ dp_raw_telemetry_backup_location }}","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","store":"{{ dp_object_store_type }}","apiVersion":"v2","parallelization":200},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":200,"appName":"Workflow Summarizer","deviceMapping":true}' #echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.model.WorkflowSummary","modelParams":{"apiVersion":"v2"},"output":[{"to":"console","params":{"printEvent": false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$topic'"}}],"parallelization":8,"appName":"Workflow Summarizer","deviceMapping":true}' ;; "video-streaming") @@ -126,19 +126,19 @@ config() { echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.ETBMetricsJob","modelParams":{"reportConfig":{"id":"etb_metrics","metrics":[],"labels":{"date":"Date","identifier":"Textbook ID","name":"Textbook Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","totalQRCodes":"Total number of QR codes","contentLinkedQR":"Number of QR codes with atleast 1 linked content","withoutContentQR":"Number of QR codes with no linked content","withoutContentT1":"Term 1 QR Codes with no linked content","withoutContentT2":"Term 2 QR Codes with no linked content","status":"Textbook Status","totalContentLinked":"Total content linked","totalQRLinked":"Total QR codes linked to content","totalQRNotLinked":"Total number of QR codes with no linked content","leafNodesCount":"Total number of leaf nodes","leafNodeUnlinked":"Number of leaf nodes with no content","l1Name":"Level 1 Name","l2Name":"Level 2 Name","l3Name":"Level 3 Name","l4Name":"Level 4 Name","l5Name":"Level 5 Name","dialcode":"QR Code","sum(scans)":"Total Scans","noOfContent":"Number of contents","nodeType":"Type of Node","term":"Term"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"dialcode_counts.csv","postContainer":"'$reportPostContainer'"}},"dialcodeReportConfig":{"id":"etb_metrics","metrics":[],"labels":{},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":1,"reportPath":"dialcode_counts.csv","rollupAge":"ACADEMIC_YEAR","rollupCol":"Date","rollupRange":10,"postContainer":"'$reportPostContainer'"}},"etbFileConfig":{"bucket":"'$reportPostContainer'","file":"dialcode_scans/dialcode_counts.csv"},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"equals","dimension":"contentType","value":"TextBook"},{"type":"in","dimension":"status","values":["Live","Draft","Review"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"tenantConfig":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ETB Metrics Model","deviceMapping":false}' ;; "course-enrollment-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseEnrollmentJob","modelParams":{"reportConfig":{"id":"tpd_metrics","metrics":[],"labels":{"completionCount":"Completion Count","status":"Status","enrollmentCount":"Enrollment Count","courseName":"Course Name","batchName":"Batch Name"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"DAY","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"course_enrollment.csv"}},"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]},"limit":10000}},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Enrollment Metrics Model","deviceMapping":false}' ;; "course-consumption-report") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams": {"esConfig": {"request": {"filters": {"objectType": ["Content"],"contentType": ["Course"],"identifier": [],"status": ["Live"]}}},"reportConfig": {"id": "tpd_metrics","labels": {"date": "Date","status": "Batch Status","timespent": "Timespent in mins","courseName": "Course Name","batchName": "Batch Name"},"dateRange": {"staticInterval": "LastDay","granularity": "all"},"metrics": [{"metric": "totalCoursePlays","label": "Total Course Plays (in mins)","druidQuery": {"queryType": "groupBy","dataSource": "summary-events","intervals":"LastDay","aggregations": [{"name": "sum__edata_time_spent","type": "doubleSum","fieldName": "edata_time_spent"}],"dimensions": [{"fieldName": "object_rollup_l1","aliasName": "courseId"}, {"fieldName": "uid","aliasName": "userId"}, {"fieldName": "context_cdata_id","aliasName": "batchId"}],"filters": [{"type": "equals","dimension": "eid","value": "ME_WORKFLOW_SUMMARY"}, {"type": "in","dimension": "dimensions_pdata_id","values": ["'$producerEnv'.app", "'$producerEnv'.portal"]}, {"type": "equals","dimension": "dimensions_type","value": "content"}, {"type": "equals","dimension": "dimensions_mode","value": "play"}, {"type": "equals","dimension": "context_cdata_type","value": "batch"}],"postAggregation": [{"type": "arithmetic","name": "timespent","fields": {"leftField": "sum__edata_time_spent","rightField": 60,"rightFieldType": "constant"},"fn": "/"}],"descending": "false"}}],"output": [{"type": "csv","metrics": ["timespent"],"dims": []}],"queryType": "groupBy"},"store": "{{ dp_object_store_type }}","format":"csv","key": "druid-reports/","filePath": "druid-reports/","container":"'$bucket'","folderPrefix": ["slug", "reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "TPD Course Consumption Metrics Model","deviceMapping": false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.CourseConsumptionJob","modelParams":{"esConfig":{"request":{"filters":{"objectType":["Content"],"contentType":["Course"],"identifier":[],"status":["Live"]}}},"reportConfig":{"id":"tpd_metrics","labels":{"date":"Date","status":"Batch Status","timespent":"Timespent in mins","courseName":"Course Name","batchName":"Batch Name"},"dateRange":{"staticInterval":"LastDay","granularity":"all"},"metrics":[{"metric":"totalCoursePlays","label":"Total Course Plays (in mins)","druidQuery":{"queryType":"groupBy","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"sum__edata_time_spent","type":"doubleSum","fieldName":"edata_time_spent"}],"dimensions":[{"fieldName":"object_rollup_l1","aliasName":"courseId"},{"fieldName":"uid","aliasName":"userId"},{"fieldName":"context_cdata_id","aliasName":"batchId"}],"filters":[{"type":"equals","dimension":"eid","value":"ME_WORKFLOW_SUMMARY"},{"type":"in","dimension":"dimensions_pdata_id","values":["'$producerEnv'.app","'$producerEnv'.portal"]},{"type":"equals","dimension":"dimensions_type","value":"content"},{"type":"equals","dimension":"dimensions_mode","value":"play"},{"type":"equals","dimension":"context_cdata_type","value":"batch"}],"postAggregation":[{"type":"arithmetic","name":"timespent","fields":{"leftField":"sum__edata_time_spent","rightField":60,"rightFieldType":"constant"},"fn":"/"}],"descending":"false"}}],"output":[{"type":"csv","metrics":["timespent"],"dims":[]}],"queryType":"groupBy"},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"TPD Course Consumption Metrics Model","deviceMapping":false}' ;; "textbook-progress-report") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent": "Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.report.TextBookProgressModel","modelParams":{"reportConfig":{"id":"content_progress_metrics","metrics":[],"labels":{"board":"Board","medium":"Medium","gradeLevel":"Grade","subject":"Subject","resourceType":"Content Type","totalContent":"Total Contents","live":"Live","review":"Review","draft":"Draft","unlisted":"Limited Sharing","application_ecml":"Created on Diksha","video_youtube":"YouTube Content","video_mp4":"Uploaded Videos","application_pdf":"Text Content","application_html":"Uploaded Interactive Content","identifier":"Content ID","creator":"Created By","createdOn":"Creation Date","lastPublishDate":"Last Publish Date","status":"Status","pkgVersion":"Number of times Published","lastPublishedOn":"Pending in current status since","pendingInCurrentStatus":"Pending in current status since"},"output":[{"type":"csv","dims":[]}],"mergeConfig":{"frequency":"WEEK","basePath":"'$baseScriptPath'","rollup":0,"reportPath":"content_progress_metrics.csv","postContainer":"'$reportPostContainer'"}},"filter":{"tenantId":"","slugName":""},"store":"{{ dp_object_store_type }}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$bucket'","folderPrefix":["slug","reportName"],"sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Progress Metrics Model","deviceMapping":false}' ;; "audit-metrics-report") echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.model.MetricsAuditJob","modelParams":{"auditConfig":[{"name":"denorm","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"telemetry-denormalized/raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]},"filters":[{"name":"flags.user_data_retrieved","operator":"EQ","value":true},{"name":"flags.content_data_retrieved","operator":"EQ","value":true},{"name":"flags.device_data_retrieved","operator":"EQ","value":true},{"name":"flags.dialcode_data_retrieved","operator":"EQ","value":true},{"name":"flags.collection_data_retrieved","operator":"EQ","value":true},{"name":"flags.derived_location_retrieved","operator":"EQ","value":true}]},{"name":"failed","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"failed/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"unique","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"unique/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"raw/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"channel-raw","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/raw/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"channel-summary","search":{"type":"{{ dp_object_store_type }}","queries":[{"folder":true,"bucket":"'$bucket'","prefix":"channel/*/summary/","startDate":"'$endDate'","endDate":"'$endDate'*.json.gz"}]}},{"name":"derived","search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"derived/wfs/","startDate":"'$endDate'","endDate":"'$endDate'"}]}},{"name":"telemetry-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"telemetry-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}},{"name":"summary-count","search":{"type":"druid","druidQuery":{"queryType":"timeSeries","dataSource":"summary-events","intervals":"LastDay","aggregations":[{"name":"total_count","type":"count","fieldName":"count"}],"descending":"false"}}}]},"output":[{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$metricsTopic'"}}],"parallelization":8,"appName":"Metrics Audit"}' ;; "sourcing-metrics") - echo '{"search": {"type": "none"},"model": "org.ekstep.analytics.sourcing.SourcingMetrics","modelParams": {"reportConfig": {"id": "textbook_report","metrics": [],"labels": {"date": "Date","primaryCategory":"Collection Category","identifier": "Collection ID","name": "Collection Name","medium": "Medium","gradeLevel": "Grade","subject": "Subject","createdOn": "Created On","lastUpdatedOn": "Last Updated On","reportDate": "Report generation date","board": "Board","grade": "Grade","chapters": "Folder Name","totalChapters": "Total number of first level folders","status": "Textbook Status"},"output": [{"type": "csv","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}, {"type": "json","dims": ["identifier", "channel", "name"],"fileParameters": ["id", "dims"]}]},"druidConfig": {"queryType": "groupBy","dataSource": "content-model-snapshot","intervals": "1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations": [{"name": "count","type": "count"}],"dimensions": [{"fieldName": "channel","aliasName": "channel"}, {"fieldName": "identifier","aliasName": "identifier","type": "Extraction","outputType": "STRING","extractionFn": [{"type": "javascript","fn": "function(str){return str == null ? null: str.split(\".\")[0]}"}]}, {"fieldName": "name","aliasName": "name"}, {"fieldName": "createdFor","aliasName": "createdFor"}, {"fieldName": "createdOn","aliasName": "createdOn"}, {"fieldName": "lastUpdatedOn","aliasName": "lastUpdatedOn"}, {"fieldName": "board","aliasName": "board"}, {"fieldName": "medium","aliasName": "medium"}, {"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName": "gradeLevel","aliasName": "gradeLevel"}, {"fieldName": "subject","aliasName": "subject"}, {"fieldName": "status","aliasName": "status"}],"filters": [{"type": "in","dimension": "primaryCategory","values": ["Digital Textbook", "Course", "Content Playlist","Question paper","Question Paper"]}, {"type": "in","dimension": "status","values": ["Live"]}],"postAggregation": [],"descending": "false","limitSpec": {"type": "default","limit": 1000000,"columns": [{"dimension": "count","direction": "descending"}]}},"store": "{{ dp_object_store_type }}","storageContainer": "'$reportPostContainer'","format": "csv","key": "druid-reports/","filePath": "druid-reports/","container": "'$reportPostContainer'","sparkCassandraConnectionHost": "'$sunbirdPlatformCassandraHost'","folderPrefix": ["slug", "reportName"]},"output": [{"to": "console","params": {"printEvent": false}}],"parallelization": 8,"appName": "Textbook Report Job","deviceMapping": false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.sourcing.SourcingMetrics","modelParams":{"reportConfig":{"id":"textbook_report","metrics":[],"labels":{"date":"Date","primaryCategory":"Collection Category","identifier":"Collection ID","name":"Collection Name","medium":"Medium","gradeLevel":"Grade","subject":"Subject","createdOn":"Created On","lastUpdatedOn":"Last Updated On","reportDate":"Report generation date","board":"Board","grade":"Grade","chapters":"Folder Name","totalChapters":"Total number of first level folders","status":"Textbook Status"},"output":[{"type":"csv","dims":["identifier","channel","name"],"fileParameters":["id","dims"]},{"type":"json","dims":["identifier","channel","name"],"fileParameters":["id","dims"]}]},"druidConfig":{"queryType":"groupBy","dataSource":"content-model-snapshot","intervals":"1901-01-01T00:00:00+00:00/2101-01-01T00:00:00+00:00","aggregations":[{"name":"count","type":"count"}],"dimensions":[{"fieldName":"channel","aliasName":"channel"},{"fieldName":"identifier","aliasName":"identifier","type":"Extraction","outputType":"STRING","extractionFn":[{"type":"javascript","fn":"function(str){return str == null ? null: str.split(\".\")[0]}"}]},{"fieldName":"name","aliasName":"name"},{"fieldName":"createdFor","aliasName":"createdFor"},{"fieldName":"createdOn","aliasName":"createdOn"},{"fieldName":"lastUpdatedOn","aliasName":"lastUpdatedOn"},{"fieldName":"board","aliasName":"board"},{"fieldName":"medium","aliasName":"medium"},{"fieldName":"primaryCategory","aliasName":"primaryCategory"},{"fieldName":"gradeLevel","aliasName":"gradeLevel"},{"fieldName":"subject","aliasName":"subject"},{"fieldName":"status","aliasName":"status"}],"filters":[{"type":"in","dimension":"primaryCategory","values":["Digital Textbook","Course","Content Playlist","Question paper","Question Paper"]},{"type":"in","dimension":"status","values":["Live"]}],"postAggregation":[],"descending":"false","limitSpec":{"type":"default","limit":1000000,"columns":[{"dimension":"count","direction":"descending"}]}},"store":"{{ dp_object_store_type }}","storageContainer":"'$reportPostContainer'","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv","key":"druid-reports/","filePath":"druid-reports/","container":"'$reportPostContainer'","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","folderPrefix":["slug","reportName"]},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Textbook Report Job","deviceMapping":false}' ;; "druid-dataset") echo '{"search":{"type":"none"},"model":"org.sunbird.analytics.exhaust.OnDemandDruidExhaustJob","modelParams":{"store":"{{ dp_object_store_type }}","container":"'$reportPostContainer'","key":"ml_reports/","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","format":"csv"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"ML Druid Data Model"}' From 0166b61609bfb360e74e349fe742af21f387e7e5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 16:03:24 +1000 Subject: [PATCH 145/203] updted to replace the temp dir when run on spark bds cluster Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 1db6a6cc3b..383b42110b 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -136,6 +136,17 @@ line: 'spark_output_temp_dir="/var/log/sparkapp/tmp/"' tags: - framework-spark-cluster + when: dp_object_store_type != "oci" + +- name: Update spark temp dir value for cluster + lineinfile: + path: '{{ analytics.home }}/models-{{ model_version }}/application.conf' + regexp: '^spark_output_temp_dir="/mount/data/analytics/tmp/"' + line: 'spark_output_temp_dir="/var/log/spark/"' + tags: + - framework-spark-cluster + when: dp_object_store_type == "oci" + - name: Update logger kafka config for cluster lineinfile: From c4ca689ca7d18effcf7dda5dd3ac085dd3445ac6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 26 Apr 2023 22:02:44 +1000 Subject: [PATCH 146/203] updated variable for analytics db name Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index c9a3ba285e..488bb9486d 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -236,7 +236,7 @@ druid.ingestion.path="/druid/indexer/v1/task" druid.segment.path="/druid/coordinator/v1/metadata/datasources/" druid.deletesegment.path="/druid/coordinator/v1/datasources/" -postgres.druid.db="{{ druid_report_postgres_db_name }}" +postgres.druid.db="{{ spark_postgres_db_name }}" postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" postgres.druid.user="{{ druid_report_postgres_db_username }}" postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" From 678b01d4e1ae326288084e6af50733e069d82258 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 13:45:23 +1000 Subject: [PATCH 147/203] added the properties fs.s3.buffer.dir Signed-off-by: Deepak Devadathan --- .../roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 index ce84928ce1..161b5f3ecf 100644 --- a/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/oci-bds-spark-cluster/templates/create-cluster.sh.j2 @@ -146,6 +146,7 @@ function update_bds_config(){ add_properties "fs.oci.client.auth.tenantId" $tenid add_properties "fs.oci.client.auth.userId" $usid add_properties "fs.oci.client.regionCodeOrId" $region + add_properties "fs.s3.buffer.dir" /tmp #Update it to ambari echo "updating ambari config" update_ambari_config From d5956565b3237d50f5b60f2a7704d18b7c3c2491 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 14:09:12 +1000 Subject: [PATCH 148/203] updated the pg db name varible Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/update-job-requests.py.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 index 4085041965..c8358b668d 100644 --- a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 +++ b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 @@ -92,7 +92,7 @@ def get_columns_names(db,table): def main(batchSize, jobId,jobType,table): host="{{postgres.db_url}}" port={{postgres.db_port}} - user="{{postgres.db_username}}" + user="{{spark_pg_db_name}}" password="{{postgres.db_password}}" database="{{postgres.spark_db_name}}" url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) From 26838b52ffc13299494a7b5376742456e6af4e61 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 14:13:25 +1000 Subject: [PATCH 149/203] added default value Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/defaults/main.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/defaults/main.yml b/ansible/roles/data-products-deploy/defaults/main.yml index fec9fc0816..06cdf1b0f2 100755 --- a/ansible/roles/data-products-deploy/defaults/main.yml +++ b/ansible/roles/data-products-deploy/defaults/main.yml @@ -288,4 +288,7 @@ jets3t_s3_request_signature_version: "{{ s3_request_signature_version }}" jets3t_s3_endpoint_host: "{% if s3_storage_endpoint %}{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}{% endif %}" jets3t_s3_disable_dns_buckets: "{{ s3_path_style_access }}" jets3t_s3_https_only: "{{ s3_https_only }}" -jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}" \ No newline at end of file +jets3t_s3_default_bucket_location: "{{ s3_default_bucket_location }}" + + +spark_pg_db_name: "analytics" \ No newline at end of file From 1f144b7fa4774f1c61ae484e1a58ae50f73895a6 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 27 Apr 2023 14:22:58 +1000 Subject: [PATCH 150/203] updated the python variable Signed-off-by: Deepak Devadathan --- .../data-products-deploy/templates/update-job-requests.py.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 index c8358b668d..cfd986b008 100644 --- a/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 +++ b/ansible/roles/data-products-deploy/templates/update-job-requests.py.j2 @@ -92,9 +92,9 @@ def get_columns_names(db,table): def main(batchSize, jobId,jobType,table): host="{{postgres.db_url}}" port={{postgres.db_port}} - user="{{spark_pg_db_name}}" + user="{{postgres.db_username}}" password="{{postgres.db_password}}" - database="{{postgres.spark_db_name}}" + database="{{postgres.db_name}}" url_connect = "jdbc:postgresql://{0}:{1}/{2}".format(host, port, database) db = psycopg2.connect(host=host, user=user, password=password, database=database, port=port) From ed19234f633cdeaea32c18088a801d769640cdab Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 2 May 2023 19:54:00 +1000 Subject: [PATCH 151/203] testing pg ssl mode connection Signed-off-by: Deepak Devadathan --- .../main/scala/org/sunbird/dp/core/util/PostgresConnect.scala | 4 +++- .../src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index 8d3ae14b83..a4d168c44c 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -5,7 +5,7 @@ import java.sql.{Connection, ResultSet, SQLException} import org.postgresql.ds.PGPoolingDataSource import org.postgresql.ssl.NonValidatingFactory -final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int) +final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int, sslMode: Int) class PostgresConnect(config: PostgresConnectionConfig) { @@ -27,9 +27,11 @@ class PostgresConnect(config: PostgresConnectionConfig) { source.setPassword(config.password) source.setDatabaseName(config.database) source.setMaxConnections(config.maxConnections) + if (config.sslMode) { source.setProperty("ssl", "true") source.setProperty("sslmode", "require") source.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory") + } } @throws[Exception] diff --git a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala index fdbfe5ddf3..f45286570b 100644 --- a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala +++ b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala @@ -17,7 +17,8 @@ class PostgresConnectSpec extends BaseSpec with Matchers with MockitoSugar { database = "postgres", host = config.getString("postgres.host"), port = config.getInt("postgres.port"), - maxConnections = config.getInt("postgres.maxConnections") + maxConnections = config.getInt("postgres.maxConnections"), + sslMode= config.getOrElse("sslmode", false).asInstanceOf[Boolean]; ) val postgresConnect = new PostgresConnect(postgresConfig) val connection = postgresConnect.getConnection From d1341c6e4fd4752216d8647700a22c4e371d12aa Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 2 May 2023 20:04:17 +1000 Subject: [PATCH 152/203] removed ssl and added test data Signed-off-by: Deepak Devadathan --- .../main/scala/org/sunbird/dp/core/util/PostgresConnect.scala | 4 +--- data-pipeline-flink/dp-core/src/test/resources/base-test.conf | 2 ++ .../src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index a4d168c44c..8d3ae14b83 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -5,7 +5,7 @@ import java.sql.{Connection, ResultSet, SQLException} import org.postgresql.ds.PGPoolingDataSource import org.postgresql.ssl.NonValidatingFactory -final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int, sslMode: Int) +final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int) class PostgresConnect(config: PostgresConnectionConfig) { @@ -27,11 +27,9 @@ class PostgresConnect(config: PostgresConnectionConfig) { source.setPassword(config.password) source.setDatabaseName(config.database) source.setMaxConnections(config.maxConnections) - if (config.sslMode) { source.setProperty("ssl", "true") source.setProperty("sslmode", "require") source.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory") - } } @throws[Exception] diff --git a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf index 61c37e5494..3e0780a9b3 100644 --- a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf +++ b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf @@ -49,6 +49,8 @@ postgres { maxConnections = 2 user = "postgres" password = "postgres" + database = "postgres" + } lms-cassandra { diff --git a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala index f45286570b..fdbfe5ddf3 100644 --- a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala +++ b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala @@ -17,8 +17,7 @@ class PostgresConnectSpec extends BaseSpec with Matchers with MockitoSugar { database = "postgres", host = config.getString("postgres.host"), port = config.getInt("postgres.port"), - maxConnections = config.getInt("postgres.maxConnections"), - sslMode= config.getOrElse("sslmode", false).asInstanceOf[Boolean]; + maxConnections = config.getInt("postgres.maxConnections") ) val postgresConnect = new PostgresConnect(postgresConfig) val connection = postgresConnect.getConnection From 2b637920ff9c56adcd95483ab50a5052abc13c66 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 2 May 2023 20:15:03 +1000 Subject: [PATCH 153/203] removed database from test data Signed-off-by: Deepak Devadathan --- data-pipeline-flink/dp-core/src/test/resources/base-test.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf index 3e0780a9b3..e1829a8c34 100644 --- a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf +++ b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf @@ -49,7 +49,6 @@ postgres { maxConnections = 2 user = "postgres" password = "postgres" - database = "postgres" } From f69fd408fce76c577cef1947e4272cec6bb7f525 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 2 May 2023 21:28:25 +1000 Subject: [PATCH 154/203] added ssl config for pg ssl Signed-off-by: Deepak Devadathan --- .../org/sunbird/dp/core/util/PostgresConnect.scala | 10 ++++++---- .../dp-core/src/test/resources/base-test.conf | 1 + .../scala/org/sunbird/spec/PostgresConnectSpec.scala | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index 8d3ae14b83..08e8bc9dc0 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -5,7 +5,7 @@ import java.sql.{Connection, ResultSet, SQLException} import org.postgresql.ds.PGPoolingDataSource import org.postgresql.ssl.NonValidatingFactory -final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int) +final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int,sslMode: Boolean = true) class PostgresConnect(config: PostgresConnectionConfig) { @@ -27,9 +27,11 @@ class PostgresConnect(config: PostgresConnectionConfig) { source.setPassword(config.password) source.setDatabaseName(config.database) source.setMaxConnections(config.maxConnections) - source.setProperty("ssl", "true") - source.setProperty("sslmode", "require") - source.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory") + if (config.sslMode) { + source.setProperty("ssl", "true") + source.setProperty("sslmode", "require") + source.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory") + } } @throws[Exception] diff --git a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf index e1829a8c34..53f9ef028f 100644 --- a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf +++ b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf @@ -49,6 +49,7 @@ postgres { maxConnections = 2 user = "postgres" password = "postgres" + sslMode = false } diff --git a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala index fdbfe5ddf3..a95b9892c2 100644 --- a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala +++ b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala @@ -17,7 +17,8 @@ class PostgresConnectSpec extends BaseSpec with Matchers with MockitoSugar { database = "postgres", host = config.getString("postgres.host"), port = config.getInt("postgres.port"), - maxConnections = config.getInt("postgres.maxConnections") + maxConnections = config.getInt("postgres.maxConnections"), + sslMode = config.getBoolean("postgres.sslMode") ) val postgresConnect = new PostgresConnect(postgresConfig) val connection = postgresConnect.getConnection @@ -46,7 +47,8 @@ class PostgresConnectSpec extends BaseSpec with Matchers with MockitoSugar { database = "postgres", host = config.getString("postgres.host"), port = config.getInt("postgres.port"), - maxConnections = config.getInt("postgres.maxConnections") + maxConnections = config.getInt("postgres.maxConnections"), + sslMode = config.getBoolean("postgres.sslMode") ) val postgresConnect = new PostgresConnect(postgresConfig) val connection = postgresConnect.getConnection From 596869e1cdafdf13a5809eb4a22dbc8323ed1cc8 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 2 May 2023 22:03:19 +1000 Subject: [PATCH 155/203] testing with sslmode true Signed-off-by: Deepak Devadathan --- data-pipeline-flink/dp-core/src/test/resources/base-test.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf index 53f9ef028f..17ef923099 100644 --- a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf +++ b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf @@ -49,7 +49,7 @@ postgres { maxConnections = 2 user = "postgres" password = "postgres" - sslMode = false + sslMode = true } From 5a379ba7d3aabec36dc71e609434cb88a95cc42c Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Tue, 2 May 2023 20:02:40 +0530 Subject: [PATCH 156/203] Updated the storage config for admin reports --- ansible/roles/data-products-deploy/templates/model-config.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.j2 b/ansible/roles/data-products-deploy/templates/model-config.j2 index 95a2bf5515..f9991e1714 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.j2 @@ -105,10 +105,10 @@ config() { echo '{"search":{"type":"{{ dp_object_store_type }}"},"model":"org.ekstep.analytics.job.VideoStreamingJob","modelParams":{"maxIterations":10},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Video Streaming Job","deviceMapping":false}' ;; "admin-user-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminReportJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin User Reports","deviceMapping":false}' ;; "admin-geo-reports") - echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' + echo '{"search":{"type":"none"},"model":"org.ekstep.analytics.job.report.StateAdminGeoReportJob","modelParams":{"store":"{{dp_object_store_type}}","storageKeyConfig":"storage.key.config","storageSecretConfig":"storage.secret.config","storageContainer":"{{reports_container}}","storageEndpoint":"{{dp_storage_endpoint_config}}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')","sparkCassandraConnectionHost":"'$sunbirdPlatformCassandraHost'","sparkElasticsearchConnectionHost":"'$sunbirdPlatformElasticsearchHost'"},"output":[{"to":"console","params":{"printEvent":false}}],"parallelization":8,"appName":"Admin Geo Reports","deviceMapping":false}' ;; "telemetry-replay") echo '{"search":{"type":"{{ dp_object_store_type }}","queries":[{"bucket":"'$bucket'","prefix":"'$inputBucket'","endDate":"'$endDate'","delta":0}]},"model":"org.ekstep.analytics.job.EventsReplayJob","modelParams":{},"output":[{"to":"console","params":{"printEvent":false}},{"to":"kafka","params":{"brokerList":"'$brokerList'","topic":"'$sinkTopic'"}}],"parallelization":8,"appName":"TelemetryReplayJob","deviceMapping":false}' From 4f9883f1c297c5c13525c491854e79f8772491cf Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 10:47:03 +1000 Subject: [PATCH 157/203] added templates for ingestion spec for collection and sourcing Signed-off-by: Deepak Devadathan --- .../roles/data-products-deploy/tasks/main.yml | 4 +- .../collection-summary-ingestion-spec.j2 | 256 ++++++++++++++++++ .../templates/sourcing-ingestion-spec.j2 | 151 +++++++++++ 3 files changed, 409 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 create mode 100644 ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index 383b42110b..fafb9daa3a 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -340,7 +340,7 @@ - spark-jobs - name: Copy collection-summary ingestion spec - copy: src="collection-summary-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + copy: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -369,7 +369,7 @@ - spark-jobs - name: Copy sourcing-summary ingestion spec - copy: src="sourcing-ingestion-spec.json" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + copy: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts diff --git a/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 new file mode 100644 index 0000000000..f26c2e6447 --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/collection-summary-ingestion-spec.j2 @@ -0,0 +1,256 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "collection-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "content_org", + "expr": "contentorg" + }, + { + "type": "root", + "name": "user_org", + "expr": "orgname" + }, + { + "type": "root", + "name": "batch_start_date", + "expr": "startdate" + }, + { + "type": "root", + "name": "batch_end_date", + "expr": "enddate" + }, + { + "type": "root", + "name": "has_certificate", + "expr": "hascertified" + }, + { + "type": "root", + "name": "collection_id", + "expr": "courseid" + }, + { + "type": "root", + "name": "batch_id", + "expr": "batchid" + }, + { + "type": "root", + "name": "collection_name", + "expr": "collectionname" + }, + { + "type": "root", + "name": "batch_name", + "expr": "batchname" + }, + { + "type": "root", + "name": "total_enrolment", + "expr": "enrolleduserscount" + }, + { + "type": "root", + "name": "total_completion", + "expr": "completionuserscount" + }, + { + "type": "root", + "name": "total_certificates_issued", + "expr": "certificateissuedcount" + }, + { + "type": "root", + "name": "content_status", + "expr": "contentstatus" + }, + { + "type": "root", + "name": "user_state", + "expr": "state" + }, + { + "type": "root", + "name": "user_district", + "expr": "district" + }, + { + "type": "root", + "name": "content_channel", + "expr": "channel" + }, + { + "type": "root", + "name": "keywords", + "expr": "keywords" + }, + { + "type": "root", + "name": "timestamp", + "expr": "timestamp" + }, + { + "type": "root", + "name": "medium", + "expr": "medium" + }, + { + "type": "root", + "name": "subject", + "expr": "subject" + }, + { + "type": "root", + "name": "created_for", + "expr": "createdfor" + }, + { + "type": "root", + "name": "user_type", + "expr": "usertype" + }, + { + "type": "root", + "name": "user_subtype", + "expr": "usersubtype" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "name": "content_org" + }, + { + "name": "user_org" + }, + { + "type": "string", + "name": "batch_id" + }, + { + "type": "string", + "name": "batch_start_date" + }, + { + "type": "string", + "name": "batch_end_date" + }, + { + "type": "string", + "name": "collection_id" + }, + { + "type": "string", + "name": "collection_name" + }, + { + "type": "string", + "name": "batch_name" + }, + { + "type": "long", + "name": "total_enrolment" + }, + { + "type": "long", + "name": "total_completion" + }, + { + "type": "long", + "name": "total_certificates_issued" + }, + { + "type": "string", + "name": "content_status" + }, + { + "type": "string", + "name": "user_state" + }, + { + "type": "string", + "name": "user_district" + }, + { + "name": "keywords" + }, + { + "name": "has_certificate" + }, + { + "type": "string", + "name": "content_channel" + }, + { + "name": "medium" + }, + { + "name": "subject" + }, + { + "name": "created_for" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "user_subtype" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { +{% if dp_object_store_type == "azure" %} + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "{{reports_container}}", + "path": "/collection-summary-reports-v2/collection-summary-report-latest.json" + } + ], +{% elif (dp_object_store_type == "oci") %} + "type": "static-s3", + "uris": [ "s3://{{reports_container}}/sourcing/SourcingSummaryReport.json"], +{% endif %} + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} \ No newline at end of file diff --git a/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 b/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 new file mode 100644 index 0000000000..41bb51afba --- /dev/null +++ b/ansible/roles/data-products-deploy/templates/sourcing-ingestion-spec.j2 @@ -0,0 +1,151 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "sourcing-summary-snapshot", + "parser": { + "type": "string", + "parseSpec": { + "format": "json", + "flattenSpec": { + "useFieldDiscovery": false, + "fields": [ + { + "type": "root", + "name": "program_id", + "expr": "program_id" + }, + { + "type": "root", + "name": "status", + "expr": "status" + }, + { + "type": "root", + "name": "rootorg_id", + "expr": "rootorg_id" + }, + { + "type": "root", + "name": "user_id", + "expr": "user_id" + }, + { + "type": "root", + "name": "osid", + "expr": "osid" + }, + { + "type": "root", + "name": "user_type", + "expr": "user_type" + }, + { + "type": "root", + "name": "contributor_id", + "expr": "contributor_id" + }, + { + "type": "root", + "name": "total_contributed_content", + "expr": "total_contributed_content" + }, + { + "type": "root", + "name": "primary_category", + "expr": "primary_category" + }, + { + "type": "root", + "name": "created_by", + "expr": "created_by" + } + ] + }, + "dimensionsSpec": { + "dimensions": [ + { + "type": "string", + "name": "program_id" + }, + { + "type": "string", + "name": "status" + }, + { + "type": "string", + "name": "rootorg_id" + }, + { + "type": "string", + "name": "user_id" + }, + { + "type": "string", + "name": "osid" + }, + { + "type": "string", + "name": "user_type" + }, + { + "type": "string", + "name": "contributor_id" + }, + { + "type": "string", + "name": "primary_category" + }, + { + "type": "string", + "name": "created_by" + } + ], + "dimensionsExclusions": [] + }, + "timestampSpec": { + "column": "timestamp", + "format": "auto" + } + } + }, + "metricsSpec": [ + { + "name": "total_count", + "type": "count" + } + ], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "day", + "queryGranularity": "none", + "rollup": true + } + }, + "ioConfig": { + "type": "index", + "firehose": { +{% if dp_object_store_type == "azure" %} + "type": "static-azure-blobstore", + "blobs": [ + { + "container": "{{reports_container}}", + "path": "/sourcing/SourcingSummaryReport.json" + } + ], +{% elif (dp_object_store_type == "oci") %} + "type": "static-s3", + "uris": [ "s3://{{reports_container}}/sourcing/SourcingSummaryReport.json"], +{% endif %} + "fetchTimeout": 300000 + } + }, + "tuningConfig": { + "type": "index", + "targetPartitionSize": 5000000, + "maxRowsInMemory": 25000, + "forceExtendableShardSpecs": false, + "logParseExceptions": true + } + } +} From 64a720e2ffce61d254ccb43f42de218e0dccd026 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 10:52:36 +1000 Subject: [PATCH 158/203] used template command instead of copy Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index fafb9daa3a..c9a4caa3e2 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -340,7 +340,7 @@ - spark-jobs - name: Copy collection-summary ingestion spec - copy: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -369,7 +369,7 @@ - spark-jobs - name: Copy sourcing-summary ingestion spec - copy: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts From 039499c321ee75a98b8f68bf929498e5df679220 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 10:54:26 +1000 Subject: [PATCH 159/203] copying as json file Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/tasks/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/data-products-deploy/tasks/main.yml b/ansible/roles/data-products-deploy/tasks/main.yml index c9a4caa3e2..1c15e14023 100644 --- a/ansible/roles/data-products-deploy/tasks/main.yml +++ b/ansible/roles/data-products-deploy/tasks/main.yml @@ -340,7 +340,7 @@ - spark-jobs - name: Copy collection-summary ingestion spec - template: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="collection-summary-ingestion-spec.j2" dest={{ analytics.home }}/scripts/collection-summary-ingestion-spec.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts @@ -369,7 +369,7 @@ - spark-jobs - name: Copy sourcing-summary ingestion spec - template: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/ mode=755 owner={{ analytics_user }} group={{ analytics_group }} + template: src="sourcing-ingestion-spec.j2" dest={{ analytics.home }}/scripts/sourcing-ingestion-spec.json mode=755 owner={{ analytics_user }} group={{ analytics_group }} tags: - ed-dataproducts From 896410ce1b654293ad5cefef0ac4ad747ce6cd89 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 4 May 2023 22:05:52 +1000 Subject: [PATCH 160/203] updated model-config.json.j2 Signed-off-by: Deepak Devadathan --- .../templates/model-config.json.j2 | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index a3569c7f46..55b89f7845 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -20,7 +20,12 @@ "model": "org.ekstep.analytics.model.WorkflowSummary", "modelParams": { "apiVersion": "v2", - "parallelization": 32 + "parallelization": 32, + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}" }, "output": [ { @@ -347,6 +352,10 @@ } }, "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", @@ -458,6 +467,10 @@ "queryType": "groupBy" }, "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format":"csv", "key": "druid-reports/", "filePath": "druid-reports/", From 9418fb9d3c46204440806b227467bf6b67b8a928 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Fri, 5 May 2023 00:23:13 +0530 Subject: [PATCH 161/203] Updated model params --- .../templates/model-config.json.j2 | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index 55b89f7845..67cbd6a3cd 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -73,6 +73,11 @@ }, "model": "org.ekstep.analytics.job.report.StateAdminReportJob", "modelParams": { + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "sparkCassandraConnectionHost": "{{core_cassandra_host}}", "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" }, @@ -94,6 +99,11 @@ }, "model": "org.ekstep.analytics.job.report.StateAdminGeoReportJob", "modelParams": { + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "sparkCassandraConnectionHost": "{{core_cassandra_host}}", "sparkElasticsearchConnectionHost": "{{sunbird_es_host}}" }, @@ -144,6 +154,11 @@ "pushMetrics": true, "brokerList": "{{ brokerlist }}", "topic": "{{ analytics_metrics_topic }}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", + "store":"{{ dp_object_store_type }}", "model": [ { "model": "WorkFlowSummaryModel", @@ -303,6 +318,10 @@ "slugName": "" }, "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "format": "csv", "key": "druid-reports/", "filePath": "druid-reports/", From 8458fa9d0ca9666a1e35dd4db7d4f09a6581ae52 Mon Sep 17 00:00:00 2001 From: subhash_chandra_budde Date: Fri, 5 May 2023 00:41:23 +0530 Subject: [PATCH 162/203] Updated storage config --- .../templates/model-config.json.j2 | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ansible/roles/data-products-deploy/templates/model-config.json.j2 b/ansible/roles/data-products-deploy/templates/model-config.json.j2 index 67cbd6a3cd..bb170b2193 100644 --- a/ansible/roles/data-products-deploy/templates/model-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/model-config.json.j2 @@ -125,6 +125,11 @@ }, "model": "org.ekstep.analytics.updater.UpdateContentRating", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "startDate": "$(date --date yesterday '+%Y-%m-%d')", "endDate": "$(date '+%Y-%m-%d')" }, @@ -221,6 +226,11 @@ }, "model": "org.ekstep.analytics.model.ExperimentDefinitionModel", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "sparkElasticsearchConnectionHost": "{{ lp_composite_search_host }}" }, "output": [ @@ -514,6 +524,11 @@ }, "model": "org.ekstep.analytics.model.MetricsAuditJob", "modelParams": { + "store": "{{dp_object_store_type}}", + "storageKeyConfig":"storage.key.config", + "storageSecretConfig":"storage.secret.config", + "storageContainer":"{{reports_container}}", + "storageEndpoint":"{{dp_storage_endpoint_config}}", "auditConfig": [ { "name": "denorm", From e16a8264b2591a9b05e71ed7697f8a6726077213 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 5 May 2023 21:13:05 +1000 Subject: [PATCH 163/203] updated the right db name for druid rollup Signed-off-by: Deepak Devadathan --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 488bb9486d..c9a3ba285e 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -236,7 +236,7 @@ druid.ingestion.path="/druid/indexer/v1/task" druid.segment.path="/druid/coordinator/v1/metadata/datasources/" druid.deletesegment.path="/druid/coordinator/v1/datasources/" -postgres.druid.db="{{ spark_postgres_db_name }}" +postgres.druid.db="{{ druid_report_postgres_db_name }}" postgres.druid.url="jdbc:postgresql://{{postgres.db_url}}:{{postgres.db_port}}/" postgres.druid.user="{{ druid_report_postgres_db_username }}" postgres.druid.pass="{{ dp_vault_druid_postgress_pass }}" From 86d5bbdf2ea0d30190407179289566f17bf2a426 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 15 May 2023 09:45:15 +1000 Subject: [PATCH 164/203] testing with pgssl mode false Signed-off-by: Deepak Devadathan --- .../main/scala/org/sunbird/dp/core/util/PostgresConnect.scala | 2 +- data-pipeline-flink/dp-core/src/test/resources/base-test.conf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index 08e8bc9dc0..392094cf5e 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -5,7 +5,7 @@ import java.sql.{Connection, ResultSet, SQLException} import org.postgresql.ds.PGPoolingDataSource import org.postgresql.ssl.NonValidatingFactory -final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int,sslMode: Boolean = true) +final case class PostgresConnectionConfig(user: String, password: String, database: String, host: String, port: Int, maxConnections: Int,sslMode: Boolean = false) class PostgresConnect(config: PostgresConnectionConfig) { diff --git a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf index 17ef923099..53f9ef028f 100644 --- a/data-pipeline-flink/dp-core/src/test/resources/base-test.conf +++ b/data-pipeline-flink/dp-core/src/test/resources/base-test.conf @@ -49,7 +49,7 @@ postgres { maxConnections = 2 user = "postgres" password = "postgres" - sslMode = true + sslMode = false } From 5963a78e33d598deabf7cee9382d5f84d30c63d0 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 15 May 2023 10:28:51 +1000 Subject: [PATCH 165/203] added debug lines Signed-off-by: Deepak Devadathan --- .../functions/DeviceProfileUpdaterFunction.scala | 1 + .../dp/deviceprofile/task/DeviceProfileUpdaterConfig.scala | 1 + .../main/scala/org/sunbird/dp/core/util/PostgresConnect.scala | 3 +++ .../src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala b/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala index c5a9ac8fa7..801112282f 100644 --- a/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala +++ b/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala @@ -52,6 +52,7 @@ class DeviceProfileUpdaterFunction(config: DeviceProfileUpdaterConfig, host = config.postgresHost, port = config.postgresPort, maxConnections = config.postgresMaxConnections + sslMode = config.postgresSslMode )) } } diff --git a/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/task/DeviceProfileUpdaterConfig.scala b/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/task/DeviceProfileUpdaterConfig.scala index 68c32fa873..df84b5aeb6 100644 --- a/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/task/DeviceProfileUpdaterConfig.scala +++ b/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/task/DeviceProfileUpdaterConfig.scala @@ -35,6 +35,7 @@ class DeviceProfileUpdaterConfig(override val config: Config) extends BaseJobCon val postgresHost: String = config.getString("postgres.host") val postgresPort: Int = config.getInt("postgres.port") val postgresMaxConnections: Int = config.getInt("postgres.maxConnections") + val postgresSslMode: Boolean = config.getBoolean("postgres.sslMode") val countryCode = "country_code" diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index 392094cf5e..b7940a88ba 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -47,7 +47,10 @@ class PostgresConnect(config: PostgresConnectionConfig) { @throws[Exception] def closeConnection(): Unit = { + println("****Closing connection****") connection.close() + println("****Closed connection****") + println("****Closing source****") source.close() } diff --git a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala index a95b9892c2..f0560a9499 100644 --- a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala +++ b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala @@ -23,10 +23,10 @@ class PostgresConnectSpec extends BaseSpec with Matchers with MockitoSugar { val postgresConnect = new PostgresConnect(postgresConfig) val connection = postgresConnect.getConnection connection should not be (null) - + println("****Got valid connection****") postgresConnect.execute("CREATE TABLE device_table(id text PRIMARY KEY, channel text);") postgresConnect.execute("INSERT INTO device_table(id,channel) VALUES('12345','custchannel');") - + println("****Query executed****") val st = connection.createStatement val rs = postgresConnect.executeQuery("SELECT * FROM device_table where id='12345';") while ( { From 1785ad9b0c58abb63ed2c35ee5ce7c342c188738 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 15 May 2023 10:40:17 +1000 Subject: [PATCH 166/203] closing source on non null Signed-off-by: Deepak Devadathan --- .../main/scala/org/sunbird/dp/core/util/PostgresConnect.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index b7940a88ba..c5c1036795 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -51,7 +51,9 @@ class PostgresConnect(config: PostgresConnectionConfig) { connection.close() println("****Closed connection****") println("****Closing source****") - source.close() + if (source != null) { + source.close() + } } @throws[Exception] From 9e7973d11f246c399845e6562c2ba1f526dfb677 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 15 May 2023 10:53:39 +1000 Subject: [PATCH 167/203] overriding connection source close for testing Signed-off-by: Deepak Devadathan --- .../scala/org/sunbird/dp/core/util/PostgresConnect.scala | 9 +++------ .../scala/org/sunbird/spec/PostgresConnectSpec.scala | 2 -- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index c5c1036795..fcbaa4af5d 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -47,13 +47,10 @@ class PostgresConnect(config: PostgresConnectionConfig) { @throws[Exception] def closeConnection(): Unit = { - println("****Closing connection****") connection.close() - println("****Closed connection****") - println("****Closing source****") - if (source != null) { - source.close() - } + // if (source != null) { + // source.close() + // } } @throws[Exception] diff --git a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala index f0560a9499..5d22ffed7a 100644 --- a/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala +++ b/data-pipeline-flink/dp-core/src/test/scala/org/sunbird/spec/PostgresConnectSpec.scala @@ -23,10 +23,8 @@ class PostgresConnectSpec extends BaseSpec with Matchers with MockitoSugar { val postgresConnect = new PostgresConnect(postgresConfig) val connection = postgresConnect.getConnection connection should not be (null) - println("****Got valid connection****") postgresConnect.execute("CREATE TABLE device_table(id text PRIMARY KEY, channel text);") postgresConnect.execute("INSERT INTO device_table(id,channel) VALUES('12345','custchannel');") - println("****Query executed****") val st = connection.createStatement val rs = postgresConnect.executeQuery("SELECT * FROM device_table where id='12345';") while ( { From 5548d42b911ebd465c3c9224c006e6c25f1ab91d Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 15 May 2023 11:24:12 +1000 Subject: [PATCH 168/203] corrected the missing comma Signed-off-by: Deepak Devadathan --- .../deviceprofile/functions/DeviceProfileUpdaterFunction.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala b/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala index 801112282f..9a3bcea499 100644 --- a/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala +++ b/data-pipeline-flink/device-profile-updater/src/main/scala/org/sunbird/dp/deviceprofile/functions/DeviceProfileUpdaterFunction.scala @@ -51,7 +51,7 @@ class DeviceProfileUpdaterFunction(config: DeviceProfileUpdaterConfig, database = config.postgresDb, host = config.postgresHost, port = config.postgresPort, - maxConnections = config.postgresMaxConnections + maxConnections = config.postgresMaxConnections, sslMode = config.postgresSslMode )) } From 0e37e584b6983a6fc85b31a2fc876b0b8259573e Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 15 May 2023 12:01:47 +1000 Subject: [PATCH 169/203] testing non null source before closing Signed-off-by: Deepak Devadathan --- .../scala/org/sunbird/dp/core/util/PostgresConnect.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index fcbaa4af5d..bcb8db2b32 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -48,9 +48,9 @@ class PostgresConnect(config: PostgresConnectionConfig) { @throws[Exception] def closeConnection(): Unit = { connection.close() - // if (source != null) { - // source.close() - // } + if (Option(source).isDefined) { + source.close() + } } @throws[Exception] From 81181d004a7d55318947d222b96406f4e8f59369 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Mon, 15 May 2023 12:28:57 +1000 Subject: [PATCH 170/203] checking with isempty Signed-off-by: Deepak Devadathan --- .../main/scala/org/sunbird/dp/core/util/PostgresConnect.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala index bcb8db2b32..3c8393a651 100644 --- a/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala +++ b/data-pipeline-flink/dp-core/src/main/scala/org/sunbird/dp/core/util/PostgresConnect.scala @@ -48,7 +48,7 @@ class PostgresConnect(config: PostgresConnectionConfig) { @throws[Exception] def closeConnection(): Unit = { connection.close() - if (Option(source).isDefined) { + if (Option(source).isEmpty) { source.close() } } From 2083d15a74a13e8dc1d2b3374159ab77f717b9a9 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 17 May 2023 16:00:41 +0530 Subject: [PATCH 171/203] #0 feat: Testcase bug fixes --- .../test/scala/org/sunbird/dp/fixture/EventFixture.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala index a5e048e865..e1d220b1fb 100644 --- a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala +++ b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala @@ -24,19 +24,19 @@ object EventFixture { //Recompute aggregate event val RECOMPUTE_ASSESS_EVENT = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_2128415652377067521125","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","contentId":"do_212686723743318016173"}""" - + val courseLeafNodes_1 = Map("do_2128415652377067521125:do_2128415652377067521125:leafnodes" -> "do_212686723743318016173") val courseLeafNodes_2 = Map("do_312712196780204032110117:do_312712196780204032110117:leafnodes" -> "505c7c48ac6dc1edc9b08f21db5a571d") val courseLeafNodes_3 = Map("do_2128410273679114241112:do_2128410273679114241112:leafnodes" -> "do_2128373396098744321673") val courseLeafNodes_4 = Map("do_3129323995959541761169:do_3129323995959541761169:leafnodes" -> "do_3129323935897108481169") val courseLeafNodes_5 = Map("do_873264782364827482:do_873264782364827482:leafnodes" -> "do_313026415363981312122") - val courseLeafNodes_6 = Map("do_87326478236482748244:do_87326478236482748244:leafnodes" -> "do_1131998128479272961991") + val courseLeafNodes_6 = Map("do_87326478236482748244:do_87326478236482748244:leafnodes" -> "do_113762457691021312168") val leafNodesList = List(courseLeafNodes_1, courseLeafNodes_2, courseLeafNodes_3, courseLeafNodes_4, courseLeafNodes_6) val DUPLICATE_BATCH_ASSESS_EVENTS_1 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_313026415363981312122","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" - val DUPLICATE_BATCH_ASSESS_EVENTS_2 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_11307972307046400011917","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_1","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_2","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_3","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" + val DUPLICATE_BATCH_ASSESS_EVENTS_2 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_113762457691021312168","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_1","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_2","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_3","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" val DUPLICATE_BATCH_ASSESS_EVENTS_3 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_313026415363981312122123","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" - val DUPLICATE_BATCH_ASSESS_EVENTS_4 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_87326478236482748244","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_1131998128479272961991","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" + val DUPLICATE_BATCH_ASSESS_EVENTS_4 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_87326478236482748244","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_113762457691021312168","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" val contentCacheData_1 = Map("do_313026415363981312122" -> "{\n \"ownershipType\": [\n \"createdBy\"\n ],\n \"copyright\": \"EKSTEP\",\n \"previewUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/ecml/do_313026415363981312122-latest\",\n \"plugins\": [\n {\n \"identifier\": \"org.ekstep.stage\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.questionset\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.navigation\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.reorder\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.sequence\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionset.quiz\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.iterator\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.keyboard\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.ftb\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.mtf\",\n \"semanticVersion\": \"1.2\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.mcq\",\n \"semanticVersion\": \"1.3\"\n },\n {\n \"identifier\": \"org.ekstep.summary\",\n \"semanticVersion\": \"1.0\"\n }\n ],\n \"subject\": [\n \"CBSE Training\"\n ],\n \"channel\": \"0123221758376673287017\",\n \"downloadUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/ecar_files/do_313026415363981312122/vjjjj_1590142788385_do_313026415363981312122_1.0.ecar\",\n \"questions\": [\n {\n \"name\": \"Arrange the Given sentence \\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026448412631040111\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Test data mark is 10\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026416457089024115\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Arrange the given words in proper sentence\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026446885158912120\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"'The tree ____ is ____ and ____ mark is 3\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026423737737216117\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Match the following\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_31302641810772787218\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n }\n ],\n \"organisation\": [\n \"EKSTEP\"\n ],\n \"language\": [\n \"English\"\n ],\n \"mimeType\": \"application/vnd.ekstep.ecml-archive\",\n \"variants\": {\n \"spine\": {\n \"ecarUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/ecar_files/do_313026415363981312122/vjjjj_1590142788484_do_313026415363981312122_1.0_spine.ecar\",\n \"size\": 44094\n }\n },\n \"editorState\": \"{\\\"plugin\\\":{\\\"noOfExtPlugins\\\":11,\\\"extPlugins\\\":[{\\\"plugin\\\":\\\"org.ekstep.contenteditorfunctions\\\",\\\"version\\\":\\\"1.2\\\"},{\\\"plugin\\\":\\\"org.ekstep.keyboardshortcuts\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.richtext\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.iterator\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.navigation\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.reviewercomments\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.mtf\\\",\\\"version\\\":\\\"1.2\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.mcq\\\",\\\"version\\\":\\\"1.3\\\"},{\\\"plugin\\\":\\\"org.ekstep.keyboard\\\",\\\"version\\\":\\\"1.1\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.reorder\\\",\\\"version\\\":\\\"1.1\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.sequence\\\",\\\"version\\\":\\\"1.1\\\"}]},\\\"stage\\\":{\\\"noOfStages\\\":1,\\\"currentStage\\\":\\\"371a0b37-e8ab-4e8e-b83e-3775a2ed927d\\\",\\\"selectedPluginObject\\\":\\\"49469445-2ca2-479d-95b7-360aa07ee3bd\\\"},\\\"sidebar\\\":{\\\"selectedMenu\\\":\\\"settings\\\"}}\",\n \"objectType\": \"Content\",\n \"gradeLevel\": [\n \"Class 1\"\n ],\n \"appIcon\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_313026415363981312122/artifact/10_1560927487910.thumb.jpg\",\n \"primaryCategory\": \"Course Assessment\",\n \"collections\": [\n {\n \"name\": \"Copy of SelfServiceable\",\n \"relation\": \"hasSequenceMember\",\n \"identifier\": \"do_313026450799894528150\",\n \"description\": \"Execution\",\n \"objectType\": \"Collection\",\n \"status\": \"Retired\"\n }\n ],\n \"appId\": \"prod.diksha.portal\",\n \"contentEncoding\": \"gzip\",\n \"artifactUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_313026415363981312122/artifact/1590142788151_do_313026415363981312122.zip\",\n \"lockKey\": \"0708a132-4774-4126-b2e5-df0e83f6dd40\",\n \"sYS_INTERNAL_LAST_UPDATED_ON\": \"2020-05-22T10:19:50.208+0000\",\n \"contentType\": \"SelfAssess\",\n \"lastUpdatedBy\": \"2dded955-5656-4dc2-a683-e7e42082aa8c\",\n \"identifier\": \"do_313026415363981312122\",\n \"audience\": [\n \"Teacher\"\n ],\n \"visibility\": \"Default\",\n \"consumerId\": \"e85bcfb5-a8c2-4e65-87a2-0ebb43b45f01\",\n \"mediaType\": \"content\",\n \"osId\": \"org.ekstep.quiz.app\",\n \"lastPublishedBy\": \"5c627fa2-a7a4-490f-b9c8-bc0f42559562\",\n \"languageCode\": [\n \"en\"\n ],\n \"version\": 2,\n \"license\": \"CC BY 4.0\",\n \"prevState\": \"Review\",\n \"size\": 547518,\n \"lastPublishedOn\": \"2020-05-22T10:19:48.378+0000\",\n \"name\": \"Vjjjj\",\n \"status\": \"Retired\",\n \"totalQuestions\": 2,\n \"code\": \"org.sunbird.lfSsbq\",\n \"prevStatus\": \"Processing\",\n \"description\": \"Enter description for Assessment\",\n \"streamingUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/ecml/do_313026415363981312122-latest\",\n \"medium\": [\n \"English\"\n ],\n \"posterImage\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_3127871179775098881294/artifact/10_1560927487910.jpg\",\n \"idealScreenSize\": \"normal\",\n \"createdOn\": \"2020-05-22T09:09:54.237+0000\",\n \"copyrightYear\": 2020,\n \"contentDisposition\": \"inline\",\n \"licenseterms\": \"By creating any type of content (resources, books, courses etc.) on DIKSHA, you consent to publish it under the Creative Commons License Framework. Please choose the applicable creative commons license you wish to apply to your content.\",\n \"lastUpdatedOn\": \"2020-05-22T11:59:41.479+0000\",\n \"dialcodeRequired\": \"No\",\n \"lastStatusChangedOn\": \"2020-05-22T11:59:41.479+0000\",\n \"createdFor\": [\n \"0123221758376673287017\"\n ],\n \"creator\": \"content_creator sahu\",\n \"os\": [\n \"All\"\n ],\n \"totalScore\": 5,\n \"pkgVersion\": 1,\n \"versionKey\": \"1590142785805\",\n \"idealScreenDensity\": \"hdpi\",\n \"framework\": \"ekstep_ncert_k-12\",\n \"s3Key\": \"ecar_files/do_313026415363981312122/vjjjj_1590142788385_do_313026415363981312122_1.0.ecar\",\n \"lastSubmittedOn\": \"2020-05-22T10:19:07.606+0000\",\n \"createdBy\": \"2dded955-5656-4dc2-a683-e7e42082aa8c\",\n \"compatibilityLevel\": 2,\n \"board\": \"CBSE\"\n }") val contentCacheData_2 = Map("do_313026415363981312122123" -> "{\"totalQuestions\":1}") val contentCacheList = List(contentCacheData_1, contentCacheData_2) From 69197786246a8073381923081f9e8e1f3cb671c2 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 17 May 2023 16:50:44 +0530 Subject: [PATCH 172/203] #0 feat: Testcase bug fixes --- .../assessment-aggregator/pom.xml | 6 ++++++ .../src/test/resources/forcevalidate.conf | 2 +- .../src/test/resources/test.conf | 2 +- .../org/sunbird/dp/fixture/EventFixture.scala | 11 +++++----- .../AssessmentAggregatorTaskTestSpec.scala | 20 +++++++++++++++++++ 5 files changed, 34 insertions(+), 7 deletions(-) diff --git a/data-pipeline-flink/assessment-aggregator/pom.xml b/data-pipeline-flink/assessment-aggregator/pom.xml index 2c4ca6f665..6b7a921a70 100644 --- a/data-pipeline-flink/assessment-aggregator/pom.xml +++ b/data-pipeline-flink/assessment-aggregator/pom.xml @@ -56,6 +56,12 @@ test tests + + com.squareup.okhttp3 + mockwebserver + 4.4.0 + test + it.ozimov embedded-redis diff --git a/data-pipeline-flink/assessment-aggregator/src/test/resources/forcevalidate.conf b/data-pipeline-flink/assessment-aggregator/src/test/resources/forcevalidate.conf index 290d24809a..5f8ac8138f 100644 --- a/data-pipeline-flink/assessment-aggregator/src/test/resources/forcevalidate.conf +++ b/data-pipeline-flink/assessment-aggregator/src/test/resources/forcevalidate.conf @@ -36,5 +36,5 @@ redis { } assessment.skip.missingRecords = true -content.read.api = "http://dev.sunbirded.org/api/content/v1/read/" +content.read.api = "http://127.0.0.1:3000/api/content/v1/read/" user.activity.agg.type="attempt_metrics" \ No newline at end of file diff --git a/data-pipeline-flink/assessment-aggregator/src/test/resources/test.conf b/data-pipeline-flink/assessment-aggregator/src/test/resources/test.conf index 8e1b8bb5d1..fbe8e16690 100644 --- a/data-pipeline-flink/assessment-aggregator/src/test/resources/test.conf +++ b/data-pipeline-flink/assessment-aggregator/src/test/resources/test.conf @@ -35,5 +35,5 @@ redis { } } assessment.skip.missingRecords = false -content.read.api = "http://dev.sunbirded.org/api/content/v1/read/" +content.read.api = "http://127.0.0.1:3000/api/content/v1/read/" user.activity.agg.type="attempt_metrics" \ No newline at end of file diff --git a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala index e1d220b1fb..40bba34aee 100644 --- a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala +++ b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/fixture/EventFixture.scala @@ -1,3 +1,4 @@ + package org.sunbird.dp.fixture object EventFixture { @@ -30,14 +31,14 @@ object EventFixture { val courseLeafNodes_3 = Map("do_2128410273679114241112:do_2128410273679114241112:leafnodes" -> "do_2128373396098744321673") val courseLeafNodes_4 = Map("do_3129323995959541761169:do_3129323995959541761169:leafnodes" -> "do_3129323935897108481169") val courseLeafNodes_5 = Map("do_873264782364827482:do_873264782364827482:leafnodes" -> "do_313026415363981312122") - val courseLeafNodes_6 = Map("do_87326478236482748244:do_87326478236482748244:leafnodes" -> "do_113762457691021312168") + val courseLeafNodes_6 = Map("do_87326478236482748244:do_87326478236482748244:leafnodes" -> "do_1131998128479272961991") val leafNodesList = List(courseLeafNodes_1, courseLeafNodes_2, courseLeafNodes_3, courseLeafNodes_4, courseLeafNodes_6) val DUPLICATE_BATCH_ASSESS_EVENTS_1 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_313026415363981312122","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" - val DUPLICATE_BATCH_ASSESS_EVENTS_2 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_113762457691021312168","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_1","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_2","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_3","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_113762457691021312168","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" + val DUPLICATE_BATCH_ASSESS_EVENTS_2 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_11307972307046400011917","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_1","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_2","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776_3","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_11307972307046400011917","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" val DUPLICATE_BATCH_ASSESS_EVENTS_3 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_873264782364827482","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_313026415363981312122123","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" - val DUPLICATE_BATCH_ASSESS_EVENTS_4 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_87326478236482748244","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_113762457691021312168","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" - val contentCacheData_1 = Map("do_313026415363981312122" -> "{\n \"ownershipType\": [\n \"createdBy\"\n ],\n \"copyright\": \"EKSTEP\",\n \"previewUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/ecml/do_313026415363981312122-latest\",\n \"plugins\": [\n {\n \"identifier\": \"org.ekstep.stage\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.questionset\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.navigation\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.reorder\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.sequence\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionset.quiz\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.iterator\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.keyboard\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.ftb\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.mtf\",\n \"semanticVersion\": \"1.2\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.mcq\",\n \"semanticVersion\": \"1.3\"\n },\n {\n \"identifier\": \"org.ekstep.summary\",\n \"semanticVersion\": \"1.0\"\n }\n ],\n \"subject\": [\n \"CBSE Training\"\n ],\n \"channel\": \"0123221758376673287017\",\n \"downloadUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/ecar_files/do_313026415363981312122/vjjjj_1590142788385_do_313026415363981312122_1.0.ecar\",\n \"questions\": [\n {\n \"name\": \"Arrange the Given sentence \\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026448412631040111\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Test data mark is 10\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026416457089024115\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Arrange the given words in proper sentence\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026446885158912120\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"'The tree ____ is ____ and ____ mark is 3\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026423737737216117\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Match the following\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_31302641810772787218\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n }\n ],\n \"organisation\": [\n \"EKSTEP\"\n ],\n \"language\": [\n \"English\"\n ],\n \"mimeType\": \"application/vnd.ekstep.ecml-archive\",\n \"variants\": {\n \"spine\": {\n \"ecarUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/ecar_files/do_313026415363981312122/vjjjj_1590142788484_do_313026415363981312122_1.0_spine.ecar\",\n \"size\": 44094\n }\n },\n \"editorState\": \"{\\\"plugin\\\":{\\\"noOfExtPlugins\\\":11,\\\"extPlugins\\\":[{\\\"plugin\\\":\\\"org.ekstep.contenteditorfunctions\\\",\\\"version\\\":\\\"1.2\\\"},{\\\"plugin\\\":\\\"org.ekstep.keyboardshortcuts\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.richtext\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.iterator\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.navigation\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.reviewercomments\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.mtf\\\",\\\"version\\\":\\\"1.2\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.mcq\\\",\\\"version\\\":\\\"1.3\\\"},{\\\"plugin\\\":\\\"org.ekstep.keyboard\\\",\\\"version\\\":\\\"1.1\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.reorder\\\",\\\"version\\\":\\\"1.1\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.sequence\\\",\\\"version\\\":\\\"1.1\\\"}]},\\\"stage\\\":{\\\"noOfStages\\\":1,\\\"currentStage\\\":\\\"371a0b37-e8ab-4e8e-b83e-3775a2ed927d\\\",\\\"selectedPluginObject\\\":\\\"49469445-2ca2-479d-95b7-360aa07ee3bd\\\"},\\\"sidebar\\\":{\\\"selectedMenu\\\":\\\"settings\\\"}}\",\n \"objectType\": \"Content\",\n \"gradeLevel\": [\n \"Class 1\"\n ],\n \"appIcon\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_313026415363981312122/artifact/10_1560927487910.thumb.jpg\",\n \"primaryCategory\": \"Course Assessment\",\n \"collections\": [\n {\n \"name\": \"Copy of SelfServiceable\",\n \"relation\": \"hasSequenceMember\",\n \"identifier\": \"do_313026450799894528150\",\n \"description\": \"Execution\",\n \"objectType\": \"Collection\",\n \"status\": \"Retired\"\n }\n ],\n \"appId\": \"prod.diksha.portal\",\n \"contentEncoding\": \"gzip\",\n \"artifactUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_313026415363981312122/artifact/1590142788151_do_313026415363981312122.zip\",\n \"lockKey\": \"0708a132-4774-4126-b2e5-df0e83f6dd40\",\n \"sYS_INTERNAL_LAST_UPDATED_ON\": \"2020-05-22T10:19:50.208+0000\",\n \"contentType\": \"SelfAssess\",\n \"lastUpdatedBy\": \"2dded955-5656-4dc2-a683-e7e42082aa8c\",\n \"identifier\": \"do_313026415363981312122\",\n \"audience\": [\n \"Teacher\"\n ],\n \"visibility\": \"Default\",\n \"consumerId\": \"e85bcfb5-a8c2-4e65-87a2-0ebb43b45f01\",\n \"mediaType\": \"content\",\n \"osId\": \"org.ekstep.quiz.app\",\n \"lastPublishedBy\": \"5c627fa2-a7a4-490f-b9c8-bc0f42559562\",\n \"languageCode\": [\n \"en\"\n ],\n \"version\": 2,\n \"license\": \"CC BY 4.0\",\n \"prevState\": \"Review\",\n \"size\": 547518,\n \"lastPublishedOn\": \"2020-05-22T10:19:48.378+0000\",\n \"name\": \"Vjjjj\",\n \"status\": \"Retired\",\n \"totalQuestions\": 2,\n \"code\": \"org.sunbird.lfSsbq\",\n \"prevStatus\": \"Processing\",\n \"description\": \"Enter description for Assessment\",\n \"streamingUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/ecml/do_313026415363981312122-latest\",\n \"medium\": [\n \"English\"\n ],\n \"posterImage\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_3127871179775098881294/artifact/10_1560927487910.jpg\",\n \"idealScreenSize\": \"normal\",\n \"createdOn\": \"2020-05-22T09:09:54.237+0000\",\n \"copyrightYear\": 2020,\n \"contentDisposition\": \"inline\",\n \"licenseterms\": \"By creating any type of content (resources, books, courses etc.) on DIKSHA, you consent to publish it under the Creative Commons License Framework. Please choose the applicable creative commons license you wish to apply to your content.\",\n \"lastUpdatedOn\": \"2020-05-22T11:59:41.479+0000\",\n \"dialcodeRequired\": \"No\",\n \"lastStatusChangedOn\": \"2020-05-22T11:59:41.479+0000\",\n \"createdFor\": [\n \"0123221758376673287017\"\n ],\n \"creator\": \"content_creator sahu\",\n \"os\": [\n \"All\"\n ],\n \"totalScore\": 5,\n \"pkgVersion\": 1,\n \"versionKey\": \"1590142785805\",\n \"idealScreenDensity\": \"hdpi\",\n \"framework\": \"ekstep_ncert_k-12\",\n \"s3Key\": \"ecar_files/do_313026415363981312122/vjjjj_1590142788385_do_313026415363981312122_1.0.ecar\",\n \"lastSubmittedOn\": \"2020-05-22T10:19:07.606+0000\",\n \"createdBy\": \"2dded955-5656-4dc2-a683-e7e42082aa8c\",\n \"compatibilityLevel\": 2,\n \"board\": \"CBSE\"\n }") + val DUPLICATE_BATCH_ASSESS_EVENTS_4 = """{"assessmentTs":1568891729576,"batchId":"012846671379595264119","courseId":"do_87326478236482748244","userId":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","attemptId":"8cd87e24df268ad09a8b0060c0a40271","contentId":"do_1131998128479272961991","events":[{"eid":"ASSESS","ets":1568891735461,"ver":"3.1","mid":"ASSESS:db00a858fec1b8796c62f224874c7edf","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[],"duration":2}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1568891747395,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1568891772964,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}},{"eid":"ASSESS","ets":1568891738245,"ver":"3.1","mid":"ASSESS:135815023ec32a430632ba5d7f84fe18","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"No","score":0,"resvalues":[{"2":"{\"text\":\"Work Heavy Organization\\n\"}"}],"duration":4}},{"eid":"ASSESS","ets":1626595233000,"ver":"3.1","mid":"ASSESS:6ba5953669ea86e8f85759d3e7f5998b","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"801ae93c-8807-4be5-8853-dd49362d8776","maxscore":1,"type":"mcq","exlength":0,"params":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"},{"2":"{\"text\":\"Work Heavy Organization\\n\"}"},{"3":"{\"text\":\"Work hell Organization\\n\"}"},{"4":"{\"text\":\"None of The above\\n\"}"},{"answer":"{\"correct\":[\"1\"]}"}],"uri":"","title":"What is the Full form of WHO..?\n","mmc":[],"mc":[],"desc":""},"index":1,"pass":"Yes","score":1,"resvalues":[{"1":"{\"text\":\"World Health Organizaton\\n\"}"}],"duration":14}},{"eid":"ASSESS","ets":1629273633000,"ver":"3.1","mid":"ASSESS:018f01bf99288474860b630b513b9d0c","actor":{"id":"ff1c4bdf-27e2-49bc-a53f-6e304bb3a87f","type":"User"},"context":{"channel":"0124784842112040965","pdata":{"id":"staging.diksha.portal","ver":"2.4.0","pid":"sunbird-portal.contentplayer"},"env":"contentplayer","sid":"wqmQpaYc9mRD6jdU6NOWuBTEyGMPXFEe","did":"a08946e8b72abfeeff6642f245d470cb","cdata":[{"id":"do_2128415652377067521127","type":"course"},{"type":"batch","id":"012846671379595264119"},{"id":"f3ec2acf4360e93172b9234e29e38be4","type":"ContentSession"}],"rollup":{"l1":"0124784842112040965"}},"object":{"id":"do_313026415363981312122","type":"Content","ver":"1","rollup":{"l1":"do_2128415652377067521127","l2":"do_2128415660716359681128"}},"tags":["0124784842112040965"],"edata":{"item":{"id":"2bc922e7-985e-486a-ae23-4ba9a1c67edc","maxscore":1,"type":"mtf","exlength":0,"params":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"},{"answer":"{\"lhs\":[\"1\",\"2\",\"3\"],\"rhs\":[\"3\",\"1\",\"2\"]}"}],"uri":"","title":"MTF 3\n","mmc":[],"mc":[],"desc":""},"index":2,"pass":"No","score":0.33,"resvalues":[{"lhs":"[{\"1\":\"{\\\"text\\\":\\\"1\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"3\\\"}\"}]"},{"rhs":"[{\"1\":\"{\\\"text\\\":\\\"3\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"1\\\"}\"}]"}],"duration":24}}]}""" + val contentCacheData_1 = Map("do_313026415363981312122" -> "{\n \"ownershipType\": [\n \"createdBy\"\n ],\n \"copyright\": \"EKSTEP\",\n \"previewUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/ecml/do_313026415363981312122-latest\",\n \"plugins\": [\n {\n \"identifier\": \"org.ekstep.stage\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.questionset\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.navigation\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.reorder\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.sequence\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionset.quiz\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.iterator\",\n \"semanticVersion\": \"1.0\"\n },\n {\n \"identifier\": \"org.ekstep.keyboard\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.ftb\",\n \"semanticVersion\": \"1.1\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.mtf\",\n \"semanticVersion\": \"1.2\"\n },\n {\n \"identifier\": \"org.ekstep.questionunit.mcq\",\n \"semanticVersion\": \"1.3\"\n },\n {\n \"identifier\": \"org.ekstep.summary\",\n \"semanticVersion\": \"1.0\"\n }\n ],\n \"subject\": [\n \"CBSE Training\"\n ],\n \"channel\": \"0123221758376673287017\",\n \"downloadUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/ecar_files/do_313026415363981312122/vjjjj_1590142788385_do_313026415363981312122_1.0.ecar\",\n \"questions\": [\n {\n \"name\": \"Arrange the Given sentence \\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026448412631040111\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Test data mark is 10\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026416457089024115\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Arrange the given words in proper sentence\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026446885158912120\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"'The tree ____ is ____ and ____ mark is 3\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_313026423737737216117\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n },\n {\n \"name\": \"Match the following\\n\",\n \"relation\": \"associatedTo\",\n \"identifier\": \"do_31302641810772787218\",\n \"description\": null,\n \"objectType\": \"AssessmentItem\",\n \"status\": \"Live\"\n }\n ],\n \"organisation\": [\n \"EKSTEP\"\n ],\n \"language\": [\n \"English\"\n ],\n \"mimeType\": \"application/vnd.ekstep.ecml-archive\",\n \"variants\": {\n \"spine\": {\n \"ecarUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/ecar_files/do_313026415363981312122/vjjjj_1590142788484_do_313026415363981312122_1.0_spine.ecar\",\n \"size\": 44094\n }\n },\n \"editorState\": \"{\\\"plugin\\\":{\\\"noOfExtPlugins\\\":11,\\\"extPlugins\\\":[{\\\"plugin\\\":\\\"org.ekstep.contenteditorfunctions\\\",\\\"version\\\":\\\"1.2\\\"},{\\\"plugin\\\":\\\"org.ekstep.keyboardshortcuts\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.richtext\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.iterator\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.navigation\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.reviewercomments\\\",\\\"version\\\":\\\"1.0\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.mtf\\\",\\\"version\\\":\\\"1.2\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.mcq\\\",\\\"version\\\":\\\"1.3\\\"},{\\\"plugin\\\":\\\"org.ekstep.keyboard\\\",\\\"version\\\":\\\"1.1\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.reorder\\\",\\\"version\\\":\\\"1.1\\\"},{\\\"plugin\\\":\\\"org.ekstep.questionunit.sequence\\\",\\\"version\\\":\\\"1.1\\\"}]},\\\"stage\\\":{\\\"noOfStages\\\":1,\\\"currentStage\\\":\\\"371a0b37-e8ab-4e8e-b83e-3775a2ed927d\\\",\\\"selectedPluginObject\\\":\\\"49469445-2ca2-479d-95b7-360aa07ee3bd\\\"},\\\"sidebar\\\":{\\\"selectedMenu\\\":\\\"settings\\\"}}\",\n \"objectType\": \"Content\",\n \"gradeLevel\": [\n \"Class 1\"\n ],\n \"appIcon\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_313026415363981312122/artifact/10_1560927487910.thumb.jpg\",\n \"primaryCategory\": \"Course Assessment\",\n \"collections\": [\n {\n \"name\": \"Copy of SelfServiceable\",\n \"relation\": \"hasSequenceMember\",\n \"identifier\": \"do_313026450799894528150\",\n \"description\": \"Execution\",\n \"objectType\": \"Collection\",\n \"status\": \"Retired\"\n }\n ],\n \"appId\": \"prod.diksha.portal\",\n \"contentEncoding\": \"gzip\",\n \"artifactUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_313026415363981312122/artifact/1590142788151_do_313026415363981312122.zip\",\n \"lockKey\": \"0708a132-4774-4126-b2e5-df0e83f6dd40\",\n \"sYS_INTERNAL_LAST_UPDATED_ON\": \"2020-05-22T10:19:50.208+0000\",\n \"contentType\": \"SelfAssess\",\n \"lastUpdatedBy\": \"2dded955-5656-4dc2-a683-e7e42082aa8c\",\n \"identifier\": \"do_313026415363981312122\",\n \"audience\": [\n \"Teacher\"\n ],\n \"visibility\": \"Default\",\n \"consumerId\": \"e85bcfb5-a8c2-4e65-87a2-0ebb43b45f01\",\n \"mediaType\": \"content\",\n \"osId\": \"org.ekstep.quiz.app\",\n \"lastPublishedBy\": \"5c627fa2-a7a4-490f-b9c8-bc0f42559562\",\n \"languageCode\": [\n \"en\"\n ],\n \"version\": 2,\n \"license\": \"CC BY 4.0\",\n \"prevState\": \"Review\",\n \"size\": 547518,\n \"lastPublishedOn\": \"2020-05-22T10:19:48.378+0000\",\n \"name\": \"Vjjjj\",\n \"status\": \"Retired\",\n \"totalQuestions\": 2,\n \"code\": \"org.sunbird.lfSsbq\",\n \"prevStatus\": \"Processing\",\n \"description\": \"Enter description for Assessment\",\n \"streamingUrl\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/ecml/do_313026415363981312122-latest\",\n \"medium\": [\n \"English\"\n ],\n \"posterImage\": \"https://ntpproductionall.blob.core.windows.net/ntp-content-production/content/do_3127871179775098881294/artifact/10_1560927487910.jpg\",\n \"idealScreenSize\": \"normal\",\n \"createdOn\": \"2020-05-22T09:09:54.237+0000\",\n \"copyrightYear\": 2020,\n \"contentDisposition\": \"inline\",\n \"licenseterms\": \"By creating any type of content (resources, books, courses etc.) on DIKSHA, you consent to publish it under the Creative Commons License Framework. Please choose the applicable creative commons license you wish to apply to your content.\",\n \"lastUpdatedOn\": \"2020-05-22T11:59:41.479+0000\",\n \"dialcodeRequired\": \"No\",\n \"lastStatusChangedOn\": \"2020-05-22T11:59:41.479+0000\",\n \"createdFor\": [\n \"0123221758376673287017\"\n ],\n \"creator\": \"content_creator sahu\",\n \"os\": [\n \"All\"\n ],\n \"totalScore\": 5,\n \"pkgVersion\": 1,\n \"versionKey\": \"1590142785805\",\n \"idealScreenDensity\": \"hdpi\",\n \"framework\": \"ekstep_ncert_k-12\",\n \"s3Key\": \"ecar_files/do_313026415363981312122/vjjjj_1590142788385_do_313026415363981312122_1.0.ecar\",\n \"lastSubmittedOn\": \"2020-05-22T10:19:07.606+0000\",\n \"createdBy\": \"2dded955-5656-4dc2-a683-e7e42082aa8c\",\n \"compatibilityLevel\": 2,\n \"board\": \"CBSE\"\n }") val contentCacheData_2 = Map("do_313026415363981312122123" -> "{\"totalQuestions\":1}") val contentCacheList = List(contentCacheData_1, contentCacheData_2) -} +} \ No newline at end of file diff --git a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala index 171f7e4f71..3cea7b170b 100644 --- a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala +++ b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala @@ -27,6 +27,8 @@ import org.sunbird.dp.core.util.{CassandraUtil, JSONUtil} import org.sunbird.dp.fixture.EventFixture import org.sunbird.dp.{BaseMetricsReporter, BaseTestSpec} import redis.embedded.RedisServer +import okhttp3.mockwebserver.{MockResponse, MockWebServer} +import java.io.IOException class AssessmentAggregatorTaskTestSpec extends BaseTestSpec { @@ -44,6 +46,7 @@ class AssessmentAggregatorTaskTestSpec extends BaseTestSpec { val assessmentConfig: AssessmentAggregatorConfig = new AssessmentAggregatorConfig(config) val mockKafkaUtil: FlinkKafkaConnector = mock[FlinkKafkaConnector](Mockito.withSettings().serializable()) val gson = new Gson() + val server = new MockWebServer() var cassandraUtil: CassandraUtil = _ @@ -56,6 +59,7 @@ class AssessmentAggregatorTaskTestSpec extends BaseTestSpec { EmbeddedCassandraServerHelper.startEmbeddedCassandra(80000L) cassandraUtil = new CassandraUtil(assessmentConfig.dbHost, assessmentConfig.dbPort) val session = cassandraUtil.session + setupRestUtilData() setupRedisTestData() val dataLoader = new CQLDataLoader(session) @@ -72,6 +76,7 @@ class AssessmentAggregatorTaskTestSpec extends BaseTestSpec { redisServer.stop() try { EmbeddedCassandraServerHelper.cleanEmbeddedCassandra() + server.close() } catch { case ex: Exception => { @@ -80,6 +85,21 @@ class AssessmentAggregatorTaskTestSpec extends BaseTestSpec { flinkCluster.after() } + def setupRestUtilData(): Unit = { + val do_11307972307046400011917_response = """{"id":"api.content.read","ver":"1.0","ts":"2023-05-17T10:26:51.549Z","params":{"resmsgid":"566eacd0-f49d-11ed-bf1e-7fae1bdbcdf8","msgid":"566de980-f49d-11ed-8721-d532b5857c8a","status":"successful","err":null,"errmsg":null},"responseCode":"OK","result":{"content":{"ownershipType":["createdBy"],"copyright":"Sunbird Org","previewUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/assets/do_113762457691021312168/samplevideo_1280x720_1mb.mp4","channel":"0137541424673095687","downloadUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_113762457691021312168/content-2_1679987660391_do_113762457691021312168_1.ecar","organisation":["Sunbird Org"],"language":["English"],"mimeType":"video/mp4","variants":{"full":{"ecarUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_113762457691021312168/content-2_1679987660391_do_113762457691021312168_1.ecar","size":"1058720"},"spine":{"ecarUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_113762457691021312168/content-2_1679987660892_do_113762457691021312168_1_SPINE.ecar","size":"4153"}},"objectType":"Content","appIcon":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_113762457691021312168/artifact/do_11376182453272576019_1679910221428_287-2876925_test-image-png-unit-testing-png-transparent-png.thumb.png","primaryCategory":"Explanation Content","contentEncoding":"identity","artifactUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/assets/do_113762457691021312168/samplevideo_1280x720_1mb.mp4","lockKey":"1a558750-41dd-43d3-9d4f-9f12184a902e","contentType":"Resource","category2":"Category2 Term1","identifier":"do_113762457691021312168","lastUpdatedBy":"155ce3c5-713e-4749-bc1c-95d09c640914","category3":"Category3 Term1","audience":["Student"],"category4":"Category4 Term1","category5":"Category5 Term1","visibility":"Default","category1":"Category1 Term1","discussionForum":{"enabled":"No"},"mediaType":"content","osId":"org.ekstep.quiz.app","languageCode":["en"],"lastPublishedBy":"469dc732-04f3-42d9-9a85-30957a797acc","version":2,"license":"CC BY 4.0","prevState":"Review","size":1055736,"lastPublishedOn":"2023-03-28T07:14:20.009+0000","name":"Content - 2","status":"Live","code":"62ada120-13c4-4e94-aad6-56cebe6a089c","interceptionPoints":{},"credentials":{"enabled":"No"},"prevStatus":"Processing","streamingUrl":"https://sunbirdspikemedia-inct.streaming.media.azure.net/5d2643e3-fcae-42a8-8a22-ac291a317ed4/samplevideo_1280x720_1mb.ism/manifest(format=m3u8-aapl-v3)","posterImage":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_11376182453272576019/artifact/do_11376182453272576019_1679910221428_287-2876925_test-image-png-unit-testing-png-transparent-png.png","idealScreenSize":"normal","createdOn":"2023-03-28T07:11:51.115+0000","contentDisposition":"inline","lastUpdatedOn":"2023-03-28T07:44:16.190+0000","dialcodeRequired":"No","lastStatusChangedOn":"2023-03-28T07:14:21.098+0000","createdFor":["0137541424673095687"],"creator":"contentCreator Creator","os":["All"],"se_FWIds":["NCF"],"pkgVersion":1,"versionKey":"1679989456190","idealScreenDensity":"hdpi","framework":"framework1","lastSubmittedOn":"2023-03-28T07:12:10.623+0000","createdBy":"155ce3c5-713e-4749-bc1c-95d09c640914","compatibilityLevel":1,"resourceType":"Learn"}}}""" + val do_1131998128479272961991_response = """{"id":"api.content.read","ver":"1.0","ts":"2023-05-17T11:08:52.532Z","params":{"resmsgid":"350e2740-f4a3-11ed-bf1e-7fae1bdbcdf8","msgid":"350d15d0-f4a3-11ed-8721-d532b5857c8a","status":"successful","err":null,"errmsg":null},"responseCode":"OK","result":{"content":{"ownershipType":["createdBy"],"copyright":"sunbird","se_gradeLevelIds":["ncf_gradelevel_class1"],"subject":["Telugu"],"channel":"0137541424673095687","downloadUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_11376180991508480011/test-book_1679910340314_do_11376180991508480011_1_SPINE.ecar","organisation":["Sunbird Org"],"language":["English"],"mimeType":"application/vnd.ekstep.content-collection","variants":{"spine":{"ecarUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_11376180991508480011/test-book_1679910340314_do_11376180991508480011_1_SPINE.ecar","size":"8922"},"online":{"ecarUrl":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_11376180991508480011/test-book_1679910340572_do_11376180991508480011_1_ONLINE.ecar","size":"5179"}},"leafNodes":["do_11376182093890355216","do_11376182438513868818"],"objectType":"Content","se_mediums":["Telugu"],"gradeLevel":["Class 1"],"appIcon":"","primaryCategory":"Digital Textbook","contentEncoding":"gzip","lockKey":"fbeda787-5742-4c45-a535-fbb2c99ac3a0","generateDIALCodes":"Yes","totalCompressedSize":3579342,"mimeTypesCount":"{\"video/mp4\":2,\"application/vnd.ekstep.content-collection\":2}","sYS_INTERNAL_LAST_UPDATED_ON":"2023-03-27T09:45:40.314+0000","contentType":"TextBook","se_gradeLevels":["Class 1"],"trackable":{"enabled":"No","autoBatch":"No"},"identifier":"do_11376180991508480011","audience":["Student"],"se_boardIds":["ncf_board_other"],"subjectIds":["ncf_subject_telugu"],"toc_url":"https://sunbirddev.blob.core.windows.net/sunbird-content-dev/content/do_11376180991508480011/artifact/do_11376180991508480011_toc.json","visibility":"Default","contentTypesCount":"{\"TextBookUnit\":2,\"Resource\":2}","author":"BookCreator bookCreator","consumerId":"bfe5883f-ac66-4744-a064-3ed88d986eba","childNodes":["do_11376182093890355216","do_11376181820567552012","do_11376182438513868818","do_11376181820809216014"],"discussionForum":{"enabled":"No"},"mediaType":"content","osId":"org.ekstep.quiz.app","languageCode":["en"],"lastPublishedBy":"4b4dda54-b061-4346-9aaa-e2801430b885","version":2,"se_subjects":["Telugu"],"license":"CC BY 4.0","prevState":"Review","size":8922,"lastPublishedOn":"2023-03-27T09:45:40.159+0000","name":"Test Book","mediumIds":["ncf_medium_telugu"],"status":"Live","code":"org.sunbird.jMNK3Z","credentials":{"enabled":"No"},"prevStatus":"Processing","description":"Enter description for TextBook","medium":["Telugu"],"idealScreenSize":"normal","createdOn":"2023-03-27T09:13:56.965+0000","se_boards":["Other"],"se_mediumIds":["ncf_medium_telugu"],"copyrightYear":2023,"contentDisposition":"inline","lastUpdatedOn":"2023-03-27T09:45:40.820+0000","dialcodeRequired":"No","lastStatusChangedOn":"2023-03-27T09:45:40.820+0000","createdFor":["0137541424673095687"],"creator":"BookCreator bookCreator","os":["All"],"se_subjectIds":["ncf_subject_telugu"],"se_FWIds":["NCF"],"pkgVersion":1,"versionKey":"1679910320288","idealScreenDensity":"hdpi","framework":"NCF","depth":0,"s3Key":"content/do_11376180991508480011/artifact/do_11376180991508480011_toc.json","boardIds":["ncf_board_other"],"lastSubmittedOn":"2023-03-27T09:45:20.280+0000","createdBy":"7bf81b8b-ab64-47ca-b9d7-c9f74f811980","compatibilityLevel":1,"leafNodesCount":2,"userConsent":"Yes","gradeLevelIds":["ncf_gradelevel_class1"],"board":"Other","resourceType":"Book"}}}""" + try { + server.start(3000) + } catch { + case e: IOException => + System.out.println("Exception" + e) + } + server.enqueue(new MockResponse().setBody(do_11307972307046400011917_response)) + server.url("http://127.0.0.1:3000/api/content/v1/read/do_11307972307046400011917") + server.enqueue(new MockResponse().setBody(do_1131998128479272961991_response)) + server.url("http://127.0.0.1:3000/api/content/v1/read/do_1131998128479272961991") + } + "AssessmentAggregator " should "Update event to db" in { when(mockKafkaUtil.kafkaEventSource[Event](assessmentConfig.kafkaInputTopic)).thenReturn(new AssessmentAggreagatorEventSource) From 0f1f32c2d569f9791ce02f2917294a6d524a74e5 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Thu, 18 May 2023 10:20:01 +1000 Subject: [PATCH 173/203] testing ci build Signed-off-by: Deepak Devadathan --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9fcc736131..cb3eca673d 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ target/ *.iml **/.settings dependency-reduced-pom.xml +.vscode From 877620743ffe2f6f385b69e2eecd95b118a4eb7b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 19 May 2023 13:48:00 +1000 Subject: [PATCH 174/203] added the varible for sslmode for postgres Signed-off-by: Deepak Devadathan --- ansible/inventory/env/group_vars/all.yml | 4 ++++ kubernetes/helm_charts/datapipeline_jobs/values.j2 | 1 + 2 files changed, 5 insertions(+) diff --git a/ansible/inventory/env/group_vars/all.yml b/ansible/inventory/env/group_vars/all.yml index 13114d01d9..67fc9d644c 100644 --- a/ansible/inventory/env/group_vars/all.yml +++ b/ansible/inventory/env/group_vars/all.yml @@ -76,6 +76,10 @@ postgres: db_port: 5432 db_admin_user: analytics db_admin_password: "{{dp_vault_pgdb_admin_password}}" + dp_db_name: analytics + dp_ssl_mode: require + dp_ssl: false + dp_sslfactory: org.postgresql.ssl.NonValidatingFactory postgres_address_space: 0.0.0.0/0 # Postgres trust address space diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 4122f9ea07..6c71b93872 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -194,6 +194,7 @@ base_config: | host = {{ postgres.db_url }} port = {{ postgres.db_port }} maxConnections = {{ postgres_max_connections }} + sslMode = {{ postgres.dp_ssl }} user = "{{ postgres.db_username }}" password = "{{ postgres.db_password }}" } From a33d71604722973fddc7cfb0ded3658a2cefc699 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 24 May 2023 10:06:20 +1000 Subject: [PATCH 175/203] changing the document overlay structure Signed-off-by: Deepak Devadathan --- .../secor/config/secor.common.properties | 12 ++-- .../secor/config/secor.partition.properties | 7 ++- .../helm_charts/secor/config/secor.properties | 2 +- .../secor/config/secor.s3.properties | 57 +++++++++++++++++++ 4 files changed, 69 insertions(+), 9 deletions(-) create mode 100644 kubernetes/helm_charts/secor/config/secor.s3.properties diff --git a/kubernetes/helm_charts/secor/config/secor.common.properties b/kubernetes/helm_charts/secor/config/secor.common.properties index ebe2bb7d26..fc207e660c 100644 --- a/kubernetes/helm_charts/secor/config/secor.common.properties +++ b/kubernetes/helm_charts/secor/config/secor.common.properties @@ -27,8 +27,8 @@ cloud.service={{ $.Values.storage_type }} # AWS authentication credentials. # Leave empty if using IAM role-based authentication with s3a filesystem. -aws.access.key={{ $.Values.s3_access_key }} -aws.secret.key={{ $.Values.s3_secret_id }} +aws.access.key= +aws.secret.key= aws.role= # Optional Proxy Setting. Set to true to enable proxy @@ -51,12 +51,12 @@ aws.proxy.http.port= # secor.upload.manager.class. # # http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region -aws.region={{ $.Values.s3_region }} -aws.endpoint={{ $.Values.s3_endpoint }} +aws.region= +aws.endpoint= # Toggle the AWS S3 client between virtual host style access and path style # access. See http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html -aws.client.pathstyleaccess={{ $.Values.s3_path_style_access }} +aws.client.pathstyleaccess= ########################### # START AWS S3 ENCRYPTION # @@ -358,7 +358,7 @@ secor.max.message.size.bytes=100000 # Class that will manage uploads. Default is to use the hadoop # interface to S3. # secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager -secor.upload.manager.class=com.pinterest.secor.uploader.S3UploadManager +secor.upload.manager.class= #Set below property to your timezone, and the events will be parsed and converted to the timezone specified secor.message.timezone=UTC diff --git a/kubernetes/helm_charts/secor/config/secor.partition.properties b/kubernetes/helm_charts/secor/config/secor.partition.properties index cbcc742081..f14c523a5c 100644 --- a/kubernetes/helm_charts/secor/config/secor.partition.properties +++ b/kubernetes/helm_charts/secor/config/secor.partition.properties @@ -17,6 +17,9 @@ include=secor.properties {{- if eq .Values.storage_type "Azure" }} include=secor.azure.properties {{- end }} +{{- if eq .Values.storage_type "S3" }} +include=secor.s3.properties +{{- end }} # Name of the Kafka consumer group. secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_group" }} @@ -25,7 +28,7 @@ secor.kafka.group={{ get (get $.Values.secor_jobs $.Release.Name) "consumer_grou secor.message.parser.class={{ get (get $.Values.secor_jobs $.Release.Name) "message_parser" }} # S3 path where sequence files are stored. -secor.s3.path={{- get (get $.Values.secor_jobs $.Release.Name) "base_path" }} +secor.s3.path= # Swift path where sequence files are stored. secor.swift.path=secor_dev/partition @@ -64,6 +67,6 @@ secor.max.file.age.policy=oldest # currentTime - Time of upload in HH-mm format # currentDate - Time of upload in YYYYMMDD format # folder - Folder to use based on message id map lookup -secor.s3.output_file_pattern={{ get (get $.Values.secor_jobs $.Release.Name) "output_file_pattern" }} +secor.s3.output_file_pattern= secor.partition.message.channel.identifier={{ get (get $.Values.secor_jobs $.Release.Name) "message_channel_identifier" }} diff --git a/kubernetes/helm_charts/secor/config/secor.properties b/kubernetes/helm_charts/secor/config/secor.properties index 4a724a051a..6f2876d1de 100644 --- a/kubernetes/helm_charts/secor/config/secor.properties +++ b/kubernetes/helm_charts/secor/config/secor.properties @@ -10,7 +10,7 @@ include=secor.common.properties ############### # Name of the s3 bucket where log files are stored. -secor.s3.bucket={{ $.Values.s3_bucket_name }} +secor.s3.bucket= ############### # Using Swift # diff --git a/kubernetes/helm_charts/secor/config/secor.s3.properties b/kubernetes/helm_charts/secor/config/secor.s3.properties new file mode 100644 index 0000000000..e52e372f50 --- /dev/null +++ b/kubernetes/helm_charts/secor/config/secor.s3.properties @@ -0,0 +1,57 @@ +include=secor.properties + + + +############ +# MUST SET # +############ + +# Name of the s3 bucket where log files are stored. +secor.s3.bucket={{ $.Values.s3_bucket_name }} + +# AWS authentication credentials. +# Leave empty if using IAM role-based authentication with s3a filesystem. +aws.access.key={{ $.Values.s3_access_key }} +aws.secret.key={{ $.Values.s3_secret_id }} + +# AWS region or endpoint. region should be a known region name (eg. +# us-east-1). endpoint should be a known S3 endpoint url. If neither +# are specified, then the default region (us-east-1) is used. If both +# are specified then endpoint is used. +# +# Only apply if the the S3UploadManager is used - see +# secor.upload.manager.class. +# +# http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region +aws.region={{ $.Values.s3_region }} +aws.endpoint={{ $.Values.s3_endpoint }} + +# Toggle the AWS S3 client between virtual host style access and path style +# access. See http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html +aws.client.pathstyleaccess={{ $.Values.s3_path_style_access }} + +# Class that will manage uploads. Default is to use the hadoop +# interface to S3. +# secor.upload.manager.class=com.pinterest.secor.uploader.AzureUploadManager +secor.upload.manager.class=com.pinterest.secor.uploader.S3UploadManager + +# S3 path where sequence files are stored. +secor.s3.path={{- get (get $.Values.secor_jobs $.Release.Name) "base_path" }} + +# Output file pattern excluding prefix. Defaults to topic/partition/generation_kafkaPartition_fmOffset.gz. +# Available placeholders are +# topic - The topic name the data is being fetched +# partition - The partition name +# generation - Generation +# kafkaPartition - The kafka partition +# fmOffset - First Message offset in the file. +# randomHex - A 4 character random hex to append to the file name +# currentTimestamp - Time of upload in epoch format +# currentTime - Time of upload in HH-mm format +# currentDate - Time of upload in YYYYMMDD format +# folder - Folder to use based on message id map lookup +secor.s3.output_file_pattern={{ get (get $.Values.secor_jobs $.Release.Name) "output_file_pattern" }} + +################ +# END MUST SET # +################ \ No newline at end of file From 2a22b1982d27d734ed6d9960e8a0130035379703 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Wed, 24 May 2023 11:18:08 +1000 Subject: [PATCH 176/203] removed virtualenv_python version Ubuntu 22.04 is coming with default python 3.10 version and virtual environment creation is always failing when explicitly virtualenv_python value to pip task in ansible. As per the documentation of ansible(https://docs.ansible.com/ansible/latest/collections/ansible/builtin/pip_module.html), virtualenv_python : The Python executable used for creating the virtual environment. For example python3.5, python2.7. When not specified, the Python version used to run the ansible module is used. Signed-off-by: Deepak Devadathan --- ansible/roles/portal-dashboard/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/portal-dashboard/tasks/main.yml b/ansible/roles/portal-dashboard/tasks/main.yml index adfbd76b39..7c53a72307 100644 --- a/ansible/roles/portal-dashboard/tasks/main.yml +++ b/ansible/roles/portal-dashboard/tasks/main.yml @@ -12,7 +12,7 @@ pip: name: "{{library_path}}" virtualenv: "{{ virtualenv_path }}" - virtualenv_python: "python3.6" + # virtualenv_python: "python3.6" tags: - common From 150cd65053834a1feeb92eb81cfaa3605d9b8200 Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Fri, 11 Aug 2023 10:10:27 +1000 Subject: [PATCH 177/203] spark provisioning version 3.2.1 Signed-off-by: Deepak Devadathan --- ansible/roles/analytics-spark-provision/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/analytics-spark-provision/defaults/main.yml b/ansible/roles/analytics-spark-provision/defaults/main.yml index ef986228fe..211de20b11 100644 --- a/ansible/roles/analytics-spark-provision/defaults/main.yml +++ b/ansible/roles/analytics-spark-provision/defaults/main.yml @@ -3,7 +3,7 @@ analytics: soft_path: /mount/data/analytics base_path: /home/analytics scala_version: 2.12.10 -spark_version: 3.1.3 +spark_version: 3.2.1 model_version: "2.0" spark: home: "{{ analytics.home }}/spark-{{ spark_version }}-bin-hadoop2.7" From c62af7bb1e8a07e07f1fb356f0889de287e4bbaa Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 17 Oct 2023 10:21:23 +1100 Subject: [PATCH 178/203] removed unwanted file Signed-off-by: Deepak Devadathan --- .../provision/spark/Jenkinsfile.bds.test | 60 ------------------- 1 file changed, 60 deletions(-) delete mode 100644 pipelines/provision/spark/Jenkinsfile.bds.test diff --git a/pipelines/provision/spark/Jenkinsfile.bds.test b/pipelines/provision/spark/Jenkinsfile.bds.test deleted file mode 100644 index bd6de3ad34..0000000000 --- a/pipelines/provision/spark/Jenkinsfile.bds.test +++ /dev/null @@ -1,60 +0,0 @@ - -@Library('deploy-conf') _ -node('build-slave') { - try { - String ANSI_GREEN = "\u001B[32m" - String ANSI_NORMAL = "\u001B[0m" - String ANSI_BOLD = "\u001B[1m" - String ANSI_RED = "\u001B[31m" - String ANSI_YELLOW = "\u001B[33m" - - ansiColor('xterm') { - stage('Checkout') { - checkout scm - } - - stage('copy cluster creation script') { - values = [:] - envDir = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-3].trim() - module = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-2].trim() - jobName = sh(returnStdout: true, script: "echo $JOB_NAME").split('/')[-1].trim() - currentWs = sh(returnStdout: true, script: 'pwd').trim() - ansiblePlaybook = "${currentWs}/ansible/oci-bds-spark.provision.yml" - ansibleExtraArgs = "--extra-vars \"compartment_id=${params.compartment_id} ambari_user=${params.ambari_user} cluster_password=${params.cluster_password} key_alias=${params.key_alias} user_id=${params.user_id} subnet_id=${params.subnet_id} display_name=${params.display_name} workernode=${params.workernode} public_key=${params.public_key} cluster_state=${params.type}\" --tags copy-script,copy-creation-script --vault-password-file /var/lib/jenkins/secrets/vault-pass" - values.put('currentWs', currentWs) - values.put('env', envDir) - values.put('module', module) - values.put('jobName', jobName) - values.put('ansiblePlaybook', ansiblePlaybook) - values.put('ansibleExtraArgs', ansibleExtraArgs) - println values - ansible_playbook_run(values) - } - stage('create and provision spark OCI BDS') { - oci_namespace=params.oci_namespace - bds-livy-node-ip=params.bds-livy-node-ip - sh ''' - currentws=$(pwd) - ansibleplaybook="$currentws/ansible/oci-bds-spark.provision.yml" - cd /tmp - #./create_cluster_bds.sh - - export inventory_dir=/var/lib/jenkins/workspace/Provision/dev/DataPipeline/__SparkBDSCluster3/ansible/inventory/env - echo "" >> $inventory_dir/hosts - echo "[bds-livy-node]" >> $inventory_dir/hosts - echo "$bds-livy-node-ip ansible_ssh_user=opc" >> $inventory_dir/hosts - echo "" >> $inventory_dir/hosts - - ANSIBLE_HOST_KEY_CHECKING=False - ansible-playbook -i $currentws/ansible/inventory/env/hosts $ansibleplaybook --extra-vars "oci_namespace=$oci_namespace" --tags spark-provision --vault-password-file /var/lib/jenkins/secrets/vault-pass - ''' - } - - } - } - catch (err) { - currentBuild.result = "FAILURE" - throw err - } - -} From 7e5427b205c0b5265247c4545344a04455b9b32b Mon Sep 17 00:00:00 2001 From: Deepak Devadathan Date: Tue, 17 Oct 2023 16:00:19 +1100 Subject: [PATCH 179/203] removed unwanted oci cli steps Signed-off-by: Deepak Devadathan --- .../analytics-bootstrap-spark/tasks/main.yml | 21 +------------------ .../templates/oci-cli-config.j2 | 6 ------ .../templates/oci-key.j2 | 1 - 3 files changed, 1 insertion(+), 27 deletions(-) delete mode 100644 ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 delete mode 100644 ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index 663f76d68a..59fc11c8b0 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -21,26 +21,6 @@ regexp: "export PATH={{ analytics_user_home }}/bin.*" when: cloud_service_provider == "oci" -- name: Configure OCI cli - become: yes - become_user: "{{ analytics_user }}" - file: - path: "{{ analytics_user_home }}/.oci" - state: directory - when: cloud_service_provider == "oci" - -- name: Create OCI cli config location - become: yes - become_user: "{{ analytics_user }}" - template: src=oci-key.j2 dest={{ analytics_user_home }}/.oci/oci-key.pem mode=600 owner={{ analytics_user }} group={{ analytics_group }} - when: cloud_service_provider == "oci" - -- name: Create OCI cli config file - become: yes - become_user: "{{ analytics_user }}" - template: src=oci-cli-config.j2 dest={{ analytics_user_home }}/.oci/config mode=600 owner={{ analytics_user }} group={{ analytics_group }} - when: cloud_service_provider == "oci" - - name: Adding ENV Vars to spark servers environment. become: yes lineinfile: @@ -61,6 +41,7 @@ - {var: 'STORAGE_PROVIDER', value: 'AZURE'} - {var: 'ENV', value: '{{env}}'} - {var: 'KAFKA_BROKER_HOST', value: "{{groups['processing-cluster-kafka'][0]}}:9092"} + - {var: 'OCI_CLI_AUTH', value: "instance_principal"} - name: Install required python packages become: yes diff --git a/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 deleted file mode 100644 index 56cf3ba3ef..0000000000 --- a/ansible/roles/analytics-bootstrap-spark/templates/oci-cli-config.j2 +++ /dev/null @@ -1,6 +0,0 @@ -[DEFAULT] -user={{oci_cli_user_ocid }} -fingerprint={{oci_cli_fingerprint}} -key_file=/home/analytics/.oci/oci-key.pem -tenancy={{oci_cli_tenancy}} -region={{oci_cli_region}} \ No newline at end of file diff --git a/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 b/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 deleted file mode 100644 index b969594016..0000000000 --- a/ansible/roles/analytics-bootstrap-spark/templates/oci-key.j2 +++ /dev/null @@ -1 +0,0 @@ -{{ oci_cli_key_content }} \ No newline at end of file From 850ffac018f35c9e52bf11701d338469e16ade67 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Thu, 17 Nov 2022 15:51:57 +0530 Subject: [PATCH 180/203] Added cloud storage services AWS and GCP --- .../templates/job-cluster-jobmanager.yaml | 16 +++++++++++- .../templates/job-cluster-taskmanager.yaml | 13 ++++++++++ .../datapipeline/flink-jobs/values.j2 | 8 ++++++ .../templates/cluster-config.json.j2 | 25 ++++++++++++++++++- .../templates/flink_job_deployment.yaml | 10 ++++++++ .../helm_charts/datapipeline_jobs/values.j2 | 3 +++ 6 files changed, 73 insertions(+), 2 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 1e7c5e2778..91534a85d5 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -20,7 +20,8 @@ spec: imagePullPolicy: Always workingDir: /opt/flink command: ["/opt/flink/bin/standalone-job.sh"] -{{- $job-config-key := .Release.Name }} +#{{- $job-config-key := .Release.Name }} + args: ["start-foreground", "--job-classname={{ index .Values $job-config-key.job_classname }}", "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", @@ -29,7 +30,20 @@ spec: "-Dblob.server.port=6124", "-Dqueryable-state.server.ports=6125", "-Djobmanager.heap.size={{ index .Values $job-config-key.job_manager_heap_size }}", +{{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" +{{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: - containerPort: 6123 diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml index cad7c3f47c..348bdd66e4 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml @@ -29,7 +29,20 @@ spec: "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", "-Dtaskmanager.rpc.port=6122", "-Dtaskmanager.heap.size={{ index .Values $job-config-key.task_manager_heap_size }}", +{{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}", +{{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: - containerPort: 6122 diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index fd34c8c647..7d3bb58f8a 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -8,6 +8,14 @@ image_tag: {{ image_tag }} azure_storage_account={{ sunbird_private_storage_account_name }} azure_storage_secret={{ sunbird_private_storage_account_key }} +s3_access_key: {{ s3_storage_key }} +s3_secret_key: {{ s3_storage_secret }} +s3_endpoint: {{ s3_storage_endpoint }} +s3_path_style_access: {{ s3_path_style_access }} +gcloud_client_key: {{ gcloud_client_key }} +gcloud_private_secret: {{ gcloud_private_secret }} +gcloud_project_id: {{ gcloud_project_id }} + telemetry-extractor: job_name=telemetry-extractor diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 0c3ba9b886..4140254f32 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -2,13 +2,36 @@ {% if dp_object_store_type == "azure" %} { "jars": [ + {% if checkpoint_store_type == "azure" %} "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% elif checkpoint_store_type == "s3" %} + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "s3://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% elif checkpoint_store_type == "gcloud" %} + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "gs://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% endif %} ], - "file": "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "file": + {% if checkpoint_store_type == "azure" %} + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% elif checkpoint_store_type == "s3" %} + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% elif checkpoint_store_type == "gcloud" %} + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% endif %} "files": [ + {% if checkpoint_store_type == "azure" %} "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" + {% elif checkpoint_store_type == "s3" %} + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", + {% elif checkpoint_store_type == "gcloud" %} + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", + {% endif %} ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index 10e6b62181..a103a13788 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -118,6 +118,11 @@ spec: "-Ds3.secret-key={{ .Values.s3_secret_key }}", "-Ds3.endpoint={{ .Values.s3_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", @@ -199,6 +204,11 @@ spec: "-Ds3.secret-key={{ .Values.s3_secret_key }}", "-Ds3.endpoint={{ .Values.s3_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 6c71b93872..e49b4c2d63 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -15,6 +15,9 @@ s3_endpoint: {{ s3_storage_endpoint }} {% endif %} s3_path_style_access: {{ s3_path_style_access }} +gcloud_client_key: {{ gcloud_client_key }} +gcloud_private_secret: {{ gcloud_private_secret }} +gcloud_project_id: {{ gcloud_project_id }} serviceMonitor: enabled: {{ service_monitor_enabled | lower}} From 3fa4da92607812d6554ca802153a5576dd8b2f09 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Thu, 17 Nov 2022 15:58:11 +0530 Subject: [PATCH 181/203] Added cloud storage services AWS and GCP --- .../flink-jobs/templates/job-cluster-jobmanager.yaml | 2 +- .../datapipeline_jobs/templates/flink_job_deployment.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 91534a85d5..c2d4918111 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -20,7 +20,7 @@ spec: imagePullPolicy: Always workingDir: /opt/flink command: ["/opt/flink/bin/standalone-job.sh"] -#{{- $job-config-key := .Release.Name }} +{{- $job-config-key := .Release.Name }} args: ["start-foreground", "--job-classname={{ index .Values $job-config-key.job_classname }}", diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index a103a13788..ab76362787 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -206,9 +206,9 @@ spec: "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", From 63679d5f069d675f1bfcd2ab6abeb74a8a6a5c03 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Tue, 29 Nov 2022 03:36:41 +0530 Subject: [PATCH 182/203] updated common vars Update oci changes --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index c9a3ba285e..45bd330139 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -314,4 +314,4 @@ uci.fushionauth.postgres.pass="{{ uci_postgres.fushionauth_db_psss }}" uci.exhaust.store.prefix="" uci.encryption.secret="{{ uci_encryption_secret_key }}" -// END OF UCI Related Job Configs \ No newline at end of file +// END OF UCI Related Job Configs From 9d365682786d46c8a4137c4c1c31fab32550aaa9 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Thu, 15 Dec 2022 17:41:38 +0530 Subject: [PATCH 183/203] common variable changes in data-pipeline Update oci changes --- .../templates/job-cluster-jobmanager.yaml | 14 ++++---- .../datapipeline/flink-jobs/values.j2 | 14 +++----- .../templates/cluster-config.json.j2 | 32 +++++++++---------- .../templates/common.conf.j2 | 2 -- .../templates/flink_job_deployment.yaml | 28 ++++++++-------- .../helm_charts/datapipeline_jobs/values.j2 | 6 +--- 6 files changed, 43 insertions(+), 53 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index c2d4918111..618ab35706 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -31,18 +31,18 @@ spec: "-Dqueryable-state.server.ports=6125", "-Djobmanager.heap.size={{ index .Values $job-config-key.job_manager_heap_size }}", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net: {{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_public_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" {{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index 7d3bb58f8a..6c06f450f8 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -6,16 +6,12 @@ dockerhub: {{ dockerhub }} repository: {{ datapipeline_repository|default('data-pipeline') }} image_tag: {{ image_tag }} -azure_storage_account={{ sunbird_private_storage_account_name }} -azure_storage_secret={{ sunbird_private_storage_account_key }} -s3_access_key: {{ s3_storage_key }} -s3_secret_key: {{ s3_storage_secret }} -s3_endpoint: {{ s3_storage_endpoint }} +checkpoint_store_type: {{ cloud_storage_type }} //Need to check +cloud_storage_key: {{ cloud_public_storage_accountname }} +cloud_storage_secret: {{ cloud_public_storage_secret }} +cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} s3_path_style_access: {{ s3_path_style_access }} -gcloud_client_key: {{ gcloud_client_key }} -gcloud_private_secret: {{ gcloud_private_secret }} -gcloud_project_id: {{ gcloud_project_id }} - +cloud_storage_project_id: {{ cloud_public_storage_project }} telemetry-extractor: job_name=telemetry-extractor diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 4140254f32..7cfa9e7bdc 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -2,34 +2,34 @@ {% if dp_object_store_type == "azure" %} { "jars": [ - {% if checkpoint_store_type == "azure" %} - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif checkpoint_store_type == "s3" %} + {% if cloud_storage_type == "azure" %} + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% elif cloud_storage_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif checkpoint_store_type == "gcloud" %} + {% elif cloud_storage_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" {% endif %} ], "file": - {% if checkpoint_store_type == "azure" %} - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif checkpoint_store_type == "s3" %} + {% if cloud_storage_type == "azure" %} + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% elif cloud_storage_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif checkpoint_store_type == "gcloud" %} + {% elif cloud_storage_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", {% endif %} "files": [ - {% if checkpoint_store_type == "azure" %} - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" - {% elif checkpoint_store_type == "s3" %} + {% if cloud_storage_type == "azure" %} + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/application.conf" + {% elif cloud_storage_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", - {% elif checkpoint_store_type == "gcloud" %} + {% elif cloud_storage_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", {% endif %} ], @@ -46,8 +46,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } {% elif (dp_object_store_type == "s3") %} diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 45bd330139..307d357b52 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -8,8 +8,6 @@ service.search.path="{{ service.search.path }}" spark.cassandra.connection.host="{{groups['dp-cassandra'][0]}}" cassandra.keyspace_prefix="{{ cassandra_keyspace_prefix }}" cassandra.hierarchy_store_prefix="{{ cassandra_hierarchy_store_prefix }}" - - storage.key.config="{{ dp_storage_key_config }}" storage.secret.config="{{ dp_storage_secret_config }}" reports.storage.key.config="{{ dp_reports_storage_key_config }}" diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index ab76362787..fb3cb0dee8 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -111,18 +111,18 @@ spec: args: ["start-foreground", "--job-classname={{ .Values.job_classname }}", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net={{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_storage_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", @@ -197,18 +197,18 @@ spec: command: ["/opt/flink/bin/taskmanager.sh"] args: ["start-foreground", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net={{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_storage_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index e49b4c2d63..6d2a10f88e 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -13,12 +13,8 @@ s3_endpoint: {{ oci_flink_s3_storage_endpoint }} {% else %} s3_endpoint: {{ s3_storage_endpoint }} {% endif %} - s3_path_style_access: {{ s3_path_style_access }} -gcloud_client_key: {{ gcloud_client_key }} -gcloud_private_secret: {{ gcloud_private_secret }} -gcloud_project_id: {{ gcloud_project_id }} - +cloud_storage_project_id: {{ cloud_public_storage_project }} serviceMonitor: enabled: {{ service_monitor_enabled | lower}} replicaCount: {{taskmanager_replicacount|default(1)}} From 75b971dc8f3f4a91ab30576dc133d9ce0fa4ef7c Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 19 Dec 2022 15:38:16 +0530 Subject: [PATCH 184/203] updated secor csp variables --- ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml index b8cd06675e..07c02ac1c6 100644 --- a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml @@ -2,8 +2,8 @@ analytics_user: analytics analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" -azure_container_name: "{{secor_azure_container_name}}" -azure_account_key: "{{sunbird_private_storage_account_key}}" +cloud_storage_telemetry_bucketname: "{{secor_azure_container_name}}" +cloud_private_storage_secret: "{{sunbird_private_storage_account_key}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" From 6df2fa05ee24eda48b6865571c7ca9c117cdfa3f Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 19 Dec 2022 15:42:11 +0530 Subject: [PATCH 185/203] updated analytics spark csp variables --- .../roles/analytics-spark-provision/templates/spark-env.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 index dea6e5ad06..53bf3c3888 100644 --- a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 +++ b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 @@ -72,8 +72,8 @@ export SPARK_EXECUTOR_MEMORY={{ spark.executor.memory }} export SPARK_PUBLIC_DNS="{{ spark.public_dns }}" export reports_storage_key={{sunbird_private_storage_account_name}} export reports_storage_secret={{sunbird_private_storage_account_key}} -export azure_storage_key={{sunbird_private_storage_account_name}} -export azure_storage_secret={{sunbird_private_storage_account_key}} +export cloud_private_storage_accountname={{sunbird_private_storage_account_name}} +export cloud_private_storage_secret={{sunbird_private_storage_account_key}} export druid_storage_account_key={{sunbird_public_storage_account_name}} export druid_storage_account_secret={{sunbird_public_storage_account_key}} export aws_storage_key={{ s3_storage_key }} From a599296f437975eef5604c52814057aacb631e1c Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 19 Dec 2022 16:01:45 +0530 Subject: [PATCH 186/203] common variable changes in datapipeline/flink-jobs --- .../flink-jobs/templates/job-cluster-jobmanager.yaml | 2 +- .../flink-jobs/templates/job-cluster-taskmanager.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 618ab35706..6b25ab6c53 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -33,7 +33,7 @@ spec: {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net: {{ .Values.cloud_storage_secret }}", {{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} +{{- if eq .Values.checkpoint_store_type "aws" }} "-Ds3.access-key={{ .Values.cloud_storage_key }}", "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", "-Ds3.endpoint={{ .Values.cloud_public_endpoint }}", diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml index 348bdd66e4..ecf59608db 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml @@ -32,7 +32,7 @@ spec: {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", {{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} +{{- if eq .Values.checkpoint_store_type "aws" }} "-Ds3.access-key={{ .Values.s3_access_key }}", "-Ds3.secret-key={{ .Values.s3_secret_key }}", "-Ds3.endpoint={{ .Values.s3_endpoint }}", From 93e5207cda7569c15d026242437092ea93f475a5 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Wed, 21 Dec 2022 11:28:12 +0530 Subject: [PATCH 187/203] common variable changes in datapipeline/flink-jobs Updated the oci changes --- .../datapipeline/flink-jobs/values.j2 | 4 +- .../roles/analytics-druid/defaults/main.yml | 44 +++++++++---------- .../templates/cluster-config.json.j2 | 18 ++++---- .../templates/common.conf.j2 | 8 ++++ 4 files changed, 41 insertions(+), 33 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index 6c06f450f8..f03a3f2459 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -6,11 +6,11 @@ dockerhub: {{ dockerhub }} repository: {{ datapipeline_repository|default('data-pipeline') }} image_tag: {{ image_tag }} -checkpoint_store_type: {{ cloud_storage_type }} //Need to check +checkpoint_store_type: {{ cloud_service_provider }} cloud_storage_key: {{ cloud_public_storage_accountname }} cloud_storage_secret: {{ cloud_public_storage_secret }} cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} -s3_path_style_access: {{ s3_path_style_access }} +s3_path_style_access: {{ cloud_storage_path_style_access }} cloud_storage_project_id: {{ cloud_public_storage_project }} telemetry-extractor: diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index 8a55bc3f97..a22a3b9c0e 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -31,7 +31,7 @@ druid_request_logging_type: "file" #Druid Extensions -druid_storage_type: "azure" +druid_storage_type: {{ cloud_storage_telemetry_type }} druid_extensions_list : '"druid-azure-extensions", "graphite-emitter", "postgresql-metadata-storage", "druid-kafka-indexing-service", "druid-datasketches"' @@ -40,7 +40,7 @@ druid_community_extensions: # End of druid_extensions -druid_indexing_logs_type: azure +druid_indexing_logs_type: {{ cloud_storage_telemetry_type }} druid_indexing_log_dir: /var/druid/indexing-logs druid_indexing_storage_type : metadata druid_indexing_task_basedir : "/var/task" @@ -126,23 +126,23 @@ default_druid_configs: druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" #Druid Azure Details druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ sunbird_druid_storage_account_name }}" - azure_storage_secret: "{{ sunbird_druid_storage_account_key }}" - azure_container: "{{ druid_azure_container_name }}" + azure_account_name: "{{ cloud_public_storage_accountname }}" + azure_storage_secret: "{{ cloud_public_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure - druid_log_azure_container: "{{ druid_azure_container_name }}" + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" druid_log_azure_folder: "druidlogs" #Druid S3 Details - druid_storage_type: "{{ druid_storage_type }}" - s3_access_key: "{{ s3_storage_key }}" - s3_secret_key: "{{ s3_storage_secret }}" - s3_bucket: "{{ s3_storage_container }}" - s3_endpoint: "{{ s3_storage_endpoint }}" + druid_storage_type: "{{ cloud_storage_telemetry_type }}" + s3_access_key: "{{ cloud_public_storage_accountname }}" + s3_secret_key: "{{ cloud_public_storage_secret }}" + s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_endpoint: "{{ cloud_public_storage_endpoint }}" s3_segment_dir: "druid/raw/segments" - s3_path_like_access: "{{ s3_path_style_access }}" - s3_v4_sign_region: "{{ s3_default_bucket_location }}" + s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 - s3_logging_bucket: "{{ s3_storage_container }}" + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m @@ -200,23 +200,23 @@ default_druid_configs: druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" #Druid Azure Details druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ sunbird_druid_storage_account_name }}" - azure_storage_secret: "{{ sunbird_druid_storage_account_key }}" - azure_container: "{{ druid_azure_container_name }}" + azure_account_name: "{{ cloud_public_storage_accountname }}" + azure_storage_secret: "{{ cloud_public_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure - druid_log_azure_container: "{{ druid_azure_container_name }}" + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" druid_log_azure_folder: "druidlogs" #Druid S3 Details - druid_storage_type: "{{ druid_storage_type }}" + druid_storage_type: "{{ cloud_storage_telemetry_type }}" s3_access_key: "{{ s3_storage_key }}" s3_secret_key: "{{ s3_storage_secret }}" s3_bucket: "{{ s3_storage_container }}" s3_endpoint: "{{ s3_storage_endpoint }}" s3_segment_dir: "druid/rollup/segments" - s3_path_like_access: "{{ s3_path_style_access }}" - s3_v4_sign_region: "{{ s3_default_bucket_location }}" + s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 - s3_logging_bucket: "{{ s3_storage_container }}" + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 7cfa9e7bdc..ac0cb24755 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -2,34 +2,34 @@ {% if dp_object_store_type == "azure" %} { "jars": [ - {% if cloud_storage_type == "azure" %} + {% if cloud_storage_telemetry_type == "azure" %} "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif cloud_storage_type == "s3" %} + {% elif cloud_storage_telemetry_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif cloud_storage_type == "gcloud" %} + {% elif cloud_storage_telemetry_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" {% endif %} ], "file": - {% if cloud_storage_type == "azure" %} + {% if cloud_storage_telemetry_type == "azure" %} "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif cloud_storage_type == "s3" %} + {% elif cloud_storage_telemetry_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif cloud_storage_type == "gcloud" %} + {% elif cloud_storage_telemetry_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", {% endif %} "files": [ - {% if cloud_storage_type == "azure" %} + {% if cloud_storage_telemetry_type == "azure" %} "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/application.conf" - {% elif cloud_storage_type == "s3" %} + {% elif cloud_storage_telemetry_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", - {% elif cloud_storage_type == "gcloud" %} + {% elif cloud_storage_telemetry_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", {% endif %} ], diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 307d357b52..5304c09927 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -13,6 +13,7 @@ storage.secret.config="{{ dp_storage_secret_config }}" reports.storage.key.config="{{ dp_reports_storage_key_config }}" reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} +<<<<<<< HEAD cloud_storage_type="azure" {% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3" or dp_object_store_type == "oci") %} {% if cloud_service_provider == "oci" %} @@ -23,6 +24,13 @@ cloud_storage_type="s3" cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint_with_protocol }}" storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" +======= +cloud_storage_telemetry_type="azure" +{% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} +cloud_storage_telemetry_type="s3" +cloud_public_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" +cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" +>>>>>>> 72bae4d12 (common variable changes in datapipeline/flink-jobs) aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" {% endif %} From 76bcf5d59600e834da01e6783c63d219adca2800 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 23 Dec 2022 15:48:03 +0530 Subject: [PATCH 188/203] csp migration variables update --- .../templates/job-cluster-taskmanager.yaml | 16 ++++++++-------- .../datapipeline/flink-jobs/values.j2 | 2 +- .../templates/spark-env.j2 | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml index ecf59608db..0c858aaca2 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml @@ -30,18 +30,18 @@ spec: "-Dtaskmanager.rpc.port=6122", "-Dtaskmanager.heap.size={{ index .Values $job-config-key.task_manager_heap_size }}", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net: {{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "aws" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_storage_endpoint }}", + "-Ds3.path.style.access={{ .Values.cloud_storage_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}", + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}", {{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index f03a3f2459..8c6861aec5 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -10,7 +10,7 @@ checkpoint_store_type: {{ cloud_service_provider }} cloud_storage_key: {{ cloud_public_storage_accountname }} cloud_storage_secret: {{ cloud_public_storage_secret }} cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} -s3_path_style_access: {{ cloud_storage_path_style_access }} +cloud_storage_path_style_access: {{ cloud_storage_pathstyle_access }} cloud_storage_project_id: {{ cloud_public_storage_project }} telemetry-extractor: diff --git a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 index 53bf3c3888..48747a71d9 100644 --- a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 +++ b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 @@ -70,12 +70,12 @@ export SPARK_WORKER_MEMORY={{ spark.worker.memory }} export SPARK_WORKER_INSTANCES={{ spark.worker.instances }} export SPARK_EXECUTOR_MEMORY={{ spark.executor.memory }} export SPARK_PUBLIC_DNS="{{ spark.public_dns }}" -export reports_storage_key={{sunbird_private_storage_account_name}} -export reports_storage_secret={{sunbird_private_storage_account_key}} -export cloud_private_storage_accountname={{sunbird_private_storage_account_name}} -export cloud_private_storage_secret={{sunbird_private_storage_account_key}} -export druid_storage_account_key={{sunbird_public_storage_account_name}} -export druid_storage_account_secret={{sunbird_public_storage_account_key}} -export aws_storage_key={{ s3_storage_key }} -export aws_storage_secret={{ s3_storage_secret }} +export reports_storage_key={{cloud_public_storage_accountname}} +export reports_storage_secret={{cloud_public_storage_secret}} +export azure_storage_key={{cloud_private_storage_accountname}} +export azure_storage_secret={{cloud_private_storage_secret}} +export druid_storage_account_key={{cloud_public_storage_accountname}} +export druid_storage_account_secret={{cloud_public_storage_secret}} +export aws_storage_key={{ cloud_public_storage_accountname }} +export aws_storage_secret={{ cloud_public_storage_secret }} From 0a613c764d0d50276a085517c000afda14dc03a7 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 23 Dec 2022 16:03:44 +0530 Subject: [PATCH 189/203] csp migration variables update Updated oci changes --- ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml index 07c02ac1c6..08c834d1bd 100644 --- a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml @@ -2,8 +2,8 @@ analytics_user: analytics analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" -cloud_storage_telemetry_bucketname: "{{secor_azure_container_name}}" -cloud_private_storage_secret: "{{sunbird_private_storage_account_key}}" +azure_container_name: "{{cloud_storage_telemetry_bucketname}}" +azure_account_key: "{{cloud_private_storage_secret}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" From f8a9f235c4b8930d1f3eeaff81790638fb3c9464 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 26 Dec 2022 15:31:19 +0530 Subject: [PATCH 190/203] csp migration variables update --- .../templates/job-cluster-jobmanager.yaml | 2 +- .../datapipeline/flink-jobs/values.j2 | 8 +- .../roles/analytics-druid/defaults/main.yml | 323 +++++++++--------- .../templates/spark-env.j2 | 12 +- .../templates/common.conf.j2 | 2 +- 5 files changed, 174 insertions(+), 173 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 6b25ab6c53..a8773232bd 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -36,7 +36,7 @@ spec: {{- if eq .Values.checkpoint_store_type "aws" }} "-Ds3.access-key={{ .Values.cloud_storage_key }}", "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", - "-Ds3.endpoint={{ .Values.cloud_public_endpoint }}", + "-Ds3.endpoint={{ .Values.cloud_private_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index 8c6861aec5..d7c53a44c7 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -7,11 +7,11 @@ repository: {{ datapipeline_repository|default('data-pipeline') }} image_tag: {{ image_tag }} checkpoint_store_type: {{ cloud_service_provider }} -cloud_storage_key: {{ cloud_public_storage_accountname }} -cloud_storage_secret: {{ cloud_public_storage_secret }} -cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} +cloud_storage_key: {{ cloud_private_storage_accountname }} +cloud_storage_secret: {{ cloud_private_storage_secret }} +cloud_storage_endpoint: {{ cloud_private_storage_endpoint }} cloud_storage_path_style_access: {{ cloud_storage_pathstyle_access }} -cloud_storage_project_id: {{ cloud_public_storage_project }} +cloud_storage_project_id: {{ cloud_private_storage_project }} telemetry-extractor: job_name=telemetry-extractor diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index a22a3b9c0e..ebe05ac9a2 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -20,30 +20,29 @@ druid_zookeeper_host: "{{ groups[cluster+'-zookeeper']|join(':2181,')}}:2181" druid_postgres_user: druid #druid_postgres_host: "{{ groups['druid-postgres'][0] }}" - druid_default_tmp_dir: "/var/tmp" druid_gc_logdir: "/var/log/druid/crash_logs" druid_crash_logdir: "/var/log/druid/crash_logs" druid_log_dir: "/var/log/druid/" -#Writing request query logs to file +#Writing request query logs to file druid_request_logging_type: "file" #Druid Extensions -druid_storage_type: {{ cloud_storage_telemetry_type }} +druid_storage_type: { { cloud_storage_telemetry_type } } -druid_extensions_list : '"druid-azure-extensions", "graphite-emitter", "postgresql-metadata-storage", "druid-kafka-indexing-service", "druid-datasketches"' +druid_extensions_list: '"druid-azure-extensions", "graphite-emitter", "postgresql-metadata-storage", "druid-kafka-indexing-service", "druid-datasketches"' druid_community_extensions: - - graphite-emitter + - graphite-emitter # End of druid_extensions -druid_indexing_logs_type: {{ cloud_storage_telemetry_type }} +druid_indexing_logs_type: { { cloud_storage_telemetry_type } } druid_indexing_log_dir: /var/druid/indexing-logs -druid_indexing_storage_type : metadata -druid_indexing_task_basedir : "/var/task" +druid_indexing_storage_type: metadata +druid_indexing_task_basedir: "/var/task" druid_common_monitors: '"com.metamx.metrics.JvmMonitor","org.apache.druid.java.util.metrics.JvmMonitor"' druid_common_emitters: '"logging","graphite"' @@ -56,14 +55,14 @@ druid_whitelist_filepath: "{{ druid_path }}whitelist" #Coordinator Configurations druid_coordinator_port: 8081 -druid_coordinator_service : druid/coordinator +druid_coordinator_service: druid/coordinator druid_coordinator_tmp_dir: "{{ druid_default_tmp_dir }}" druid_coordinator_gc_logfile: "{{ druid_crash_logdir }}/gc.log" #Overlord Configurations druid_overlord_port: 8090 -druid_overlord_service : druid/overlord +druid_overlord_service: druid/overlord druid_overlord_heap_size: 256m druid_overlord_tmp_dir: "{{ druid_default_tmp_dir }}" @@ -82,14 +81,13 @@ druid_broker_gc_logdir: "{{ druid_crash_logdir }}/gc.log" druid_historical_port: 8083 druid_historical_service: druid/historical - druid_historical_tmp_dir: "{{ druid_default_tmp_dir }}" druid_historical_gc_logfile: "{{ druid_crash_logdir }}/historical.gc.log" druid_historical_heap_dump_file: "{{ druid_crash_logdir }}/historical.hprof" -druid_broker_heap_dump_file : "{{ druid_crash_logdir }}/broker.hprof" -druid_coordinator_heap_dump_file : "{{ druid_crash_logdir }}/coordinator.hprof" -druid_overlord_heap_dump_file : "{{ druid_crash_logdir }}/overlord.hprof" -druid_mm_heap_dump_file : "{{ druid_crash_logdir }}/middlemanager.hprof" +druid_broker_heap_dump_file: "{{ druid_crash_logdir }}/broker.hprof" +druid_coordinator_heap_dump_file: "{{ druid_crash_logdir }}/coordinator.hprof" +druid_overlord_heap_dump_file: "{{ druid_crash_logdir }}/overlord.hprof" +druid_mm_heap_dump_file: "{{ druid_crash_logdir }}/middlemanager.hprof" druid_historical_monitoring_monitors: '"org.apache.druid.server.metrics.HistoricalMetricsMonitor","com.metamx.metrics.JvmMonitor"' @@ -116,152 +114,155 @@ druid_router_tmp_dir: "{{ druid_default_tmp_dir }}" druid_router_gc_logfile: "{{ druid_crash_logdir }}/gc.router.log" druid_router_heap_dump_file: "{{ druid_crash_logdir }}/router.hprof" - default_druid_configs: - raw: - #Druid Postgres Details - druid_postgres_db: "druid" - druid_postgres_host: "{{ postgres.db_url }}" - druid_postgres_port: "{{ postgres.db_port }}" - druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" - #Druid Azure Details - druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ cloud_public_storage_accountname }}" - azure_storage_secret: "{{ cloud_public_storage_secret }}" - azure_container: "{{ cloud_storage_telemetry_bucketname }}" - #Logging the indexing logs to azure - druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" - druid_log_azure_folder: "druidlogs" - #Druid S3 Details - druid_storage_type: "{{ cloud_storage_telemetry_type }}" - s3_access_key: "{{ cloud_public_storage_accountname }}" - s3_secret_key: "{{ cloud_public_storage_secret }}" - s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" - s3_endpoint: "{{ cloud_public_storage_endpoint }}" - s3_segment_dir: "druid/raw/segments" - s3_path_like_access: "{{ cloud_storage_path_style_access }}" - s3_v4_sign_region: "{{ cloud_public_storage_region }}" - #Logging the indexing logs to s3 - s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" - s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" - #Druid coordinator node configuration - druid_coordinator_heap_size: 128m - druid_coordinator_period: PT30S - druid_coordinator_startDelay: PT30S - druid_coordinator_balance_strategy: diskNormalized - #Druid overlord node configuration - druid_overlord_heap_size: 256m - #Druid broker node configuration - druid_broker_min_heap_size: 128m - druid_broker_max_heap_size: 128m - druid_broker_max_direct_size: 800m - druid_broker_http_numConnections: 5 - druid_broker_server_http_numThread: 25 - druid_broker_processing_bufferBytes: 134217728 - druid_broker_processing_threads: 2 - #Druid historical node configuration - druid_historical_min_heap_size: 1048m - druid_historical_max_heap_size: 1048m - druid_historical_max_direct_size: 800m - druid_historical_http_numConnections: 5 - druid_historical_server_http_numThread: 25 - druid_historical_processing_bufferBytes: 134217728 - druid_historical_processing_threads: 2 - druid_historical_enable_cache: false - druid_historical_segmentcache_size: 10000000000 - druid_historical_server_maxsize: 10000000000 - druid_historical_processing_num_merge_buffers: 2 - druid_query_ondiskstorage_enabled: true - druid_historical_maxMergingDictionarySize: 100000000 - druid_historical_segmentcache_numloadingthreads: 4 - druid_historical_segmentcache_path: "/var/segmentstore" - druid.query.groupBy.maxOnDiskStorage: 10737418240 - #Druid middlemanager configuration - druid_middlemanager_heap_size: 128m - druid_middlemanager_worker_cap: 4 - druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" - druid_middlemanager_peon_server_http_numThread: 25 - druid_middlemanager_peon_processing_bufferBytes: 25000000 - druid_middlemanager_peon_processing_threads: 2 - druid_middlemanager_peon_server_maxsize: 0 - druid_indexing_queue_startDelay: PT30S - druid_router_heap_size: 1g - druid_router_http_numConnections: 50 - druid_router_http_readTimeout: PT5M - druid_router_http_numMaxThreads: 100 - druid_server_http_numThreads: 100 - druid_router_managementProxy_enabled: true - druid_historical_maxOnDiskStorage: 10737418240 - rollup: - #Druid Postgres Details - druid_postgres_db: "druid" - druid_postgres_host: "{{ postgres.db_url }}" - druid_postgres_port: "{{ postgres.db_port }}" - druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" - #Druid Azure Details - druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ cloud_public_storage_accountname }}" - azure_storage_secret: "{{ cloud_public_storage_secret }}" - azure_container: "{{ cloud_storage_telemetry_bucketname }}" - #Logging the indexing logs to azure - druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" - druid_log_azure_folder: "druidlogs" - #Druid S3 Details - druid_storage_type: "{{ cloud_storage_telemetry_type }}" - s3_access_key: "{{ s3_storage_key }}" - s3_secret_key: "{{ s3_storage_secret }}" - s3_bucket: "{{ s3_storage_container }}" - s3_endpoint: "{{ s3_storage_endpoint }}" - s3_segment_dir: "druid/rollup/segments" - s3_path_like_access: "{{ cloud_storage_path_style_access }}" - s3_v4_sign_region: "{{ cloud_public_storage_region }}" - #Logging the indexing logs to s3 - s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" - s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" - #Druid coordinator node configuration - druid_coordinator_heap_size: 128m - druid_coordinator_period: PT30S - druid_coordinator_startDelay: PT30S - druid_coordinator_balance_strategy: cost - #Druid overlord node configuration - druid_overlord_heap_size: 256m - #Druid broker node configuration - druid_broker_min_heap_size: 128m - druid_broker_max_heap_size: 128m - druid_broker_max_direct_size: 700m - druid_broker_http_numConnections: 5 - druid_broker_server_http_numThread: 25 - druid_broker_processing_bufferBytes: 134217728 - druid_broker_processing_threads: 2 - #Druid historical node configuration - druid_historical_min_heap_size: 1048m - druid_historical_max_heap_size: 1048m - druid_historical_max_direct_size: 800m - druid_historical_http_numConnections: 5 - druid_historical_server_http_numThread: 25 - druid_historical_processing_bufferBytes: 134217728 - druid_historical_processing_threads: 2 - druid_historical_enable_cache: false - druid_historical_segmentcache_size: 2000000000 - druid_historical_server_maxsize: 10000000000 - druid_historical_processing_num_merge_buffers: 2 - druid_query_ondiskstorage_enabled: false - druid_historical_segmentcache_numloadingthreads: 4 - druid_historical_segmentcache_path: "/var/segmentstore" - #Druid middlemanager configuration - druid_middlemanager_heap_size: 128m - druid_middlemanager_worker_cap: 4 - druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" - druid_middlemanager_peon_server_http_numThread: 25 - druid_middlemanager_peon_processing_bufferBytes: 25000000 - druid_middlemanager_peon_processing_threads: 2 - druid_middlemanager_peon_server_maxsize: 0 - druid_indexing_queue_startDelay: PT30S - druid_router_heap_size: 1g - druid_router_http_numConnections: 50 - druid_router_http_readTimeout: PT5M - druid_router_http_numMaxThreads: 100 - druid_server_http_numThreads: 100 - druid_router_managementProxy_enabled: true + raw: + #Druid Postgres Details + druid_postgres_db: "druid" + druid_postgres_host: "{{ postgres.db_url }}" + druid_postgres_port: "{{ postgres.db_port }}" + druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" + #Druid Azure Details + druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" + #Logging the indexing logs to azure + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" + druid_log_azure_folder: "druidlogs" + #Druid S3 Details + druid_storage_type: "{{ cloud_storage_telemetry_type }}" + s3_access_key: "{{ cloud_private_storage_accountname }}" + s3_secret_key: "{{ cloud_private_storage_secret }}" + s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_endpoint: "{{ cloud_private_storage_endpoint }}" + s3_segment_dir: "druid/raw/segments" + # s3_path_like_access: "{{ cloud_storage_path_style_access }}" + # s3_v4_sign_region: "{{ cloud_public_storage_region }}" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" + #Logging the indexing logs to s3 + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" + #Druid coordinator node configuration + druid_coordinator_heap_size: 128m + druid_coordinator_period: PT30S + druid_coordinator_startDelay: PT30S + druid_coordinator_balance_strategy: diskNormalized + #Druid overlord node configuration + druid_overlord_heap_size: 256m + #Druid broker node configuration + druid_broker_min_heap_size: 128m + druid_broker_max_heap_size: 128m + druid_broker_max_direct_size: 800m + druid_broker_http_numConnections: 5 + druid_broker_server_http_numThread: 25 + druid_broker_processing_bufferBytes: 134217728 + druid_broker_processing_threads: 2 + #Druid historical node configuration + druid_historical_min_heap_size: 1048m + druid_historical_max_heap_size: 1048m + druid_historical_max_direct_size: 800m + druid_historical_http_numConnections: 5 + druid_historical_server_http_numThread: 25 + druid_historical_processing_bufferBytes: 134217728 + druid_historical_processing_threads: 2 + druid_historical_enable_cache: false + druid_historical_segmentcache_size: 10000000000 + druid_historical_server_maxsize: 10000000000 + druid_historical_processing_num_merge_buffers: 2 + druid_query_ondiskstorage_enabled: true + druid_historical_maxMergingDictionarySize: 100000000 + druid_historical_segmentcache_numloadingthreads: 4 + druid_historical_segmentcache_path: "/var/segmentstore" + druid.query.groupBy.maxOnDiskStorage: 10737418240 + #Druid middlemanager configuration + druid_middlemanager_heap_size: 128m + druid_middlemanager_worker_cap: 4 + druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" + druid_middlemanager_peon_server_http_numThread: 25 + druid_middlemanager_peon_processing_bufferBytes: 25000000 + druid_middlemanager_peon_processing_threads: 2 + druid_middlemanager_peon_server_maxsize: 0 + druid_indexing_queue_startDelay: PT30S + druid_router_heap_size: 1g + druid_router_http_numConnections: 50 + druid_router_http_readTimeout: PT5M + druid_router_http_numMaxThreads: 100 + druid_server_http_numThreads: 100 + druid_router_managementProxy_enabled: true + druid_historical_maxOnDiskStorage: 10737418240 + rollup: + #Druid Postgres Details + druid_postgres_db: "druid" + druid_postgres_host: "{{ postgres.db_url }}" + druid_postgres_port: "{{ postgres.db_port }}" + druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" + #Druid Azure Details + druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" + #Logging the indexing logs to azure + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" + druid_log_azure_folder: "druidlogs" + #Druid S3 Details + druid_storage_type: "{{ cloud_storage_telemetry_type }}" + s3_access_key: "{{ s3_storage_key }}" + s3_secret_key: "{{ s3_storage_secret }}" + s3_bucket: "{{ s3_storage_container }}" + s3_endpoint: "{{ s3_storage_endpoint }}" + s3_segment_dir: "druid/rollup/segments" + # s3_path_like_access: "{{ cloud_storage_path_style_access }}" + # s3_v4_sign_region: "{{ cloud_public_storage_region }}" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" + #Logging the indexing logs to s3 + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" + #Druid coordinator node configuration + druid_coordinator_heap_size: 128m + druid_coordinator_period: PT30S + druid_coordinator_startDelay: PT30S + druid_coordinator_balance_strategy: cost + #Druid overlord node configuration + druid_overlord_heap_size: 256m + #Druid broker node configuration + druid_broker_min_heap_size: 128m + druid_broker_max_heap_size: 128m + druid_broker_max_direct_size: 700m + druid_broker_http_numConnections: 5 + druid_broker_server_http_numThread: 25 + druid_broker_processing_bufferBytes: 134217728 + druid_broker_processing_threads: 2 + #Druid historical node configuration + druid_historical_min_heap_size: 1048m + druid_historical_max_heap_size: 1048m + druid_historical_max_direct_size: 800m + druid_historical_http_numConnections: 5 + druid_historical_server_http_numThread: 25 + druid_historical_processing_bufferBytes: 134217728 + druid_historical_processing_threads: 2 + druid_historical_enable_cache: false + druid_historical_segmentcache_size: 2000000000 + druid_historical_server_maxsize: 10000000000 + druid_historical_processing_num_merge_buffers: 2 + druid_query_ondiskstorage_enabled: false + druid_historical_segmentcache_numloadingthreads: 4 + druid_historical_segmentcache_path: "/var/segmentstore" + #Druid middlemanager configuration + druid_middlemanager_heap_size: 128m + druid_middlemanager_worker_cap: 4 + druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" + druid_middlemanager_peon_server_http_numThread: 25 + druid_middlemanager_peon_processing_bufferBytes: 25000000 + druid_middlemanager_peon_processing_threads: 2 + druid_middlemanager_peon_server_maxsize: 0 + druid_indexing_queue_startDelay: PT30S + druid_router_heap_size: 1g + druid_router_http_numConnections: 50 + druid_router_http_readTimeout: PT5M + druid_router_http_numMaxThreads: 100 + druid_server_http_numThreads: 100 + druid_router_managementProxy_enabled: true enable_druid_sql: true diff --git a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 index 48747a71d9..bb8a92abf0 100644 --- a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 +++ b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 @@ -70,12 +70,12 @@ export SPARK_WORKER_MEMORY={{ spark.worker.memory }} export SPARK_WORKER_INSTANCES={{ spark.worker.instances }} export SPARK_EXECUTOR_MEMORY={{ spark.executor.memory }} export SPARK_PUBLIC_DNS="{{ spark.public_dns }}" -export reports_storage_key={{cloud_public_storage_accountname}} -export reports_storage_secret={{cloud_public_storage_secret}} +export reports_storage_key={{cloud_private_storage_accountname}} +export reports_storage_secret={{cloud_private_storage_secret}} export azure_storage_key={{cloud_private_storage_accountname}} export azure_storage_secret={{cloud_private_storage_secret}} -export druid_storage_account_key={{cloud_public_storage_accountname}} -export druid_storage_account_secret={{cloud_public_storage_secret}} -export aws_storage_key={{ cloud_public_storage_accountname }} -export aws_storage_secret={{ cloud_public_storage_secret }} +export druid_storage_account_key={{cloud_private_storage_accountname}} +export druid_storage_account_secret={{cloud_private_storage_secret}} +export aws_storage_key={{ cloud_private_storage_accountname }} +export aws_storage_secret={{ cloud_private_storage_secret }} diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 5304c09927..0e342ab71e 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -28,7 +28,7 @@ storage.endpoint.config="{{ s3_storage_endpoint_with_protocol }}" cloud_storage_telemetry_type="azure" {% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} cloud_storage_telemetry_type="s3" -cloud_public_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" +cloud_private_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" >>>>>>> 72bae4d12 (common variable changes in datapipeline/flink-jobs) aws_storage_key="{{ s3_storage_key }}" From a3a4b59c133d05c6ec0c9edb82b7bbdbfc39ccc9 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 10 Nov 2023 17:22:40 +0530 Subject: [PATCH 191/203] certjob-workflow update --- ansible/inventory/env/group_vars/all.yml | 8 +- ansible/lpa_data-products_deploy.yml | 7 +- .../analytics-bootstrap-spark/tasks/main.yml | 78 +++++++++++++------ .../roles/analytics-druid/defaults/main.yml | 12 +-- .../templates/create-cluster.sh.j2 | 4 +- .../defaults/main.yml | 36 +++++---- .../templates/conf/DialcodeRedisIndexer.j2 | 4 +- .../templates/cluster-config.json.j2 | 4 +- .../templates/start-jobmanager.j2 | 12 +-- .../templates/secor.azure.j2 | 6 +- .../tasks/main.yml | 24 +++--- .../defaults/main.yml | 2 +- .../templates/secor.azure.j2 | 4 +- ansible/spark-cluster-job-submit.yml | 6 +- .../roles/flink-jobs-deploy/defaults/main.yml | 4 +- .../helm_charts/druid-cluster/values.j2 | 4 +- kubernetes/helm_charts/secor/values.j2 | 4 +- 17 files changed, 128 insertions(+), 91 deletions(-) diff --git a/ansible/inventory/env/group_vars/all.yml b/ansible/inventory/env/group_vars/all.yml index 67fc9d644c..d0dc11fb5c 100644 --- a/ansible/inventory/env/group_vars/all.yml +++ b/ansible/inventory/env/group_vars/all.yml @@ -10,8 +10,8 @@ channel_data_exhaust_bucket: dev-data-store secrets_path: '{{inventory_dir}}/secrets.yml' artifacts_container: "{{dp_vault_artifacts_container}}" -report_azure_account_name: "{{sunbird_private_storage_account_name}}" -report_azure_storage_secret: "{{sunbird_private_storage_account_key}}" +report_azure_account_name: "{{cloud_private_storage_accountname}}" +report_azure_storage_secret: "{{cloud_private_storage_secret}}" redis_host: "{{ groups['redis'][0] }}" metadata_redis_host: "{{ groups['redis'][0] }}" @@ -50,8 +50,8 @@ secor: artifact_dir: /mount/secor artifact_ver: "0.29" azure: - account_name: "{{sunbird_private_storage_account_name}}" - account_key: "{{sunbird_private_storage_account_key}}" + account_name: "{{cloud_private_storage_accountname}}" + account_key: "{{cloud_private_storage_secret}}" container_name: "{{channel_data_exhaust_bucket}}" paths: ['/mount/secor', '/mount/secor/reports', '/mount/secor/logs', '/home/analytics/sbin', '/mount/data/analytics'] channel: "{{secor_alerts_slack_channel}}" diff --git a/ansible/lpa_data-products_deploy.yml b/ansible/lpa_data-products_deploy.yml index 1ff0cbdabc..903afcc7b3 100644 --- a/ansible/lpa_data-products_deploy.yml +++ b/ansible/lpa_data-products_deploy.yml @@ -5,8 +5,7 @@ become: yes become_user: "{{ analytics_user }}" environment: - AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" - AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" - OCI_CLI_AUTH: "instance_principal" + AZURE_STORAGE_ACCOUNT: "{{cloud_private_storage_accountname}}" + AZURE_STORAGE_KEY: "{{cloud_private_storage_secret}}" roles: - - data-products-deploy + - data-products-deploy diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index 59fc11c8b0..4d7fa8ef7c 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -4,44 +4,76 @@ become: yes become_user: "{{ analytics_user }}" lineinfile: - path: '{{ analytics_user_home }}/.bashrc' - line: 'export {{item.var}}={{item.value}}' + path: "{{ analytics_user_home }}/.bashrc" + line: "export {{item.var}}={{item.value}}" regexp: "export {{ item.var }}.*" with_items: - - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} - + - { + var: "azure_storage_key", + value: "{{ cloud_private_storage_accountname }}", + } + - { + var: "azure_storage_secret", + value: "{{ cloud_private_storage_secret }}", + } - name: Adding PATH for oci cli Vars to bashrc file of spark. become: yes become_user: "{{ analytics_user }}" lineinfile: - path: '{{ analytics_user_home }}/.bashrc' - line: 'export PATH={{ analytics_user_home }}/bin:$PATH' + path: "{{ analytics_user_home }}/.bashrc" + line: "export PATH={{ analytics_user_home }}/bin:$PATH" regexp: "export PATH={{ analytics_user_home }}/bin.*" when: cloud_service_provider == "oci" - name: Adding ENV Vars to spark servers environment. become: yes lineinfile: - path: '/etc/environment' - line: '{{item.var}}={{item.value}}' + path: "/etc/environment" + line: "{{item.var}}={{item.value}}" regexp: "{{ item.var }}.*" with_items: - - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} - - {var: 'AZURE_STORAGE_ACCOUNT', value: '{{ sunbird_private_storage_account_name }}'} - - {var: 'AZURE_STORAGE_ACCESS_KEY', value: '{{ sunbird_private_storage_account_key }}'} - - {var: 'PUBLIC_AZURE_STORAGE_ACCOUNT', value: '{{ sunbird_public_storage_account_name }}'} - - {var: 'PUBLIC_AZURE_STORAGE_ACCESS_KEY', value: '{{ sunbird_public_storage_account_key }}'} - - {var: 'PRIVATE_REPORT_CONTAINER', value: '{{ sunbird_private_azure_report_container_name }}'} - - {var: 'PUBLIC_REPORT_CONTAINER', value: '{{ sunbird_public_azure_report_container_name }}'} - - {var: 'REPORT_BACKUP_CONTAINER', value: 'portal-reports-backup'} - - {var: 'GOOGLE_CREDENTIALS_PATH', value: '/home/analytics/credentials'} - - {var: 'STORAGE_PROVIDER', value: 'AZURE'} - - {var: 'ENV', value: '{{env}}'} - - {var: 'KAFKA_BROKER_HOST', value: "{{groups['processing-cluster-kafka'][0]}}:9092"} - - {var: 'OCI_CLI_AUTH', value: "instance_principal"} + - { + var: "azure_storage_key", + value: "{{ cloud_private_storage_accountname }}", + } + - { + var: "azure_storage_secret", + value: "{{ cloud_private_storage_secret }}", + } + - { + var: "AZURE_STORAGE_ACCOUNT", + value: "{{ cloud_private_storage_accountname }}", + } + - { + var: "AZURE_STORAGE_ACCESS_KEY", + value: "{{ cloud_private_storage_secret }}", + } + - { + var: "PUBLIC_AZURE_STORAGE_ACCOUNT", + value: "{{ cloud_public_storage_accountname }}", + } + - { + var: "PUBLIC_AZURE_STORAGE_ACCESS_KEY", + value: "{{ cloud_public_storage_secret }}", + } + - { + var: "PRIVATE_REPORT_CONTAINER", + value: "{{ cloud_storage_privatereports_bucketname }}", + } + - { + var: "PUBLIC_REPORT_CONTAINER", + value: "{{ cloud_storage_publicreports_bucketname }}", + } + - { var: "REPORT_BACKUP_CONTAINER", value: "portal-reports-backup" } + - { var: "GOOGLE_CREDENTIALS_PATH", value: "/home/analytics/credentials" } + - { var: "STORAGE_PROVIDER", value: "AZURE" } + - { var: "ENV", value: "{{env}}" } + - { + var: "KAFKA_BROKER_HOST", + value: "{{groups['processing-cluster-kafka'][0]}}:9092", + } + - { var: "OCI_CLI_AUTH", value: "instance_principal" } - name: Install required python packages become: yes diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index ebe05ac9a2..c1c9ba5b90 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -136,10 +136,8 @@ default_druid_configs: s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_endpoint: "{{ cloud_private_storage_endpoint }}" s3_segment_dir: "druid/raw/segments" - # s3_path_like_access: "{{ cloud_storage_path_style_access }}" - # s3_v4_sign_region: "{{ cloud_public_storage_region }}" - s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" - s3_v4_sign_region: "{{ cloud_private_storage_region }}" + s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" @@ -212,10 +210,8 @@ default_druid_configs: s3_bucket: "{{ s3_storage_container }}" s3_endpoint: "{{ s3_storage_endpoint }}" s3_segment_dir: "druid/rollup/segments" - # s3_path_like_access: "{{ cloud_storage_path_style_access }}" - # s3_v4_sign_region: "{{ cloud_public_storage_region }}" - s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" - s3_v4_sign_region: "{{ cloud_private_storage_region }}" + s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" diff --git a/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 index 8a7703c936..76b4c45585 100644 --- a/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 @@ -8,8 +8,8 @@ headnode_size="{{headnode_size}}" location="{{location}}" http_user=admin http_password="{{azure_spark_cluster_http_password}}" -storage_account_name="{{sunbird_private_storage_account_name}}" -storage_account_key="{{sunbird_private_storage_account_key}}" +storage_account_name="{{cloud_private_storage_accountname}}" +storage_account_key="{{cloud_private_storage_secret}}" storage_container="{{spark_storage_container}}" subnet_name="{{subnet_name}}" vnet_name="{{vnet_name}}" diff --git a/ansible/roles/content-snapshot-indexer/defaults/main.yml b/ansible/roles/content-snapshot-indexer/defaults/main.yml index d6c3b6b6aa..436161e9c7 100644 --- a/ansible/roles/content-snapshot-indexer/defaults/main.yml +++ b/ansible/roles/content-snapshot-indexer/defaults/main.yml @@ -46,9 +46,8 @@ cloud_storage: container: "telemetry-data-store" # Container is different in all env so override this. object_key: "druid-content-snapshot/snapshot.txt" provider: "azure" - account_name: "{{sunbird_public_storage_account_name}}" - account_key: "{{sunbird_public_storage_account_key}}" - account_endpoint: "{{sunbird_public_storage_account_endpoint}}" + account_name: "{{cloud_public_storage_accountname}}" + account_key: "{{cloud_public_storage_secret}}" cassandra: host: "{{lp_cassandra_host}}" ## LMS-Cassandra IP Address. @@ -63,7 +62,7 @@ cassandra: new_table: "device_profile_temp" druid: - coordinator_host: "{{ groups['rollup-coordinator'][0] | default(groups['raw-coordinator'][0]) }}" + coordinator_host: "{{ groups['rollup-coordinator'][0] | default(groups['raw-coordinator'][0]) }}" data_source: "content-model-snapshot" ingestion_spec_path: "{{ content_snapshot_jar_path }}/etl-jobs-1.0/druid_models/content_index_batch.json" @@ -75,14 +74,21 @@ job_config: cassandra_redis_path: "{{ content_snapshot_path }}/config/cassandraRedis.conf" output_file_path: "{{ content_snapshot_path }}/compositeSearchBatchData" -config_files: ["application","cassandraRedis","DeviceProfile","ESCloudUploader","ESContentIndexer","DialcodeRedisIndexer"] -script_files: ["DeviceProfileScripts","DruidContentIndexer","RedisContentIndexer","RedisDialcodeIndexer","RedisUserDataIndexer","run-script"] - - - - - - - - - +config_files: + [ + "application", + "cassandraRedis", + "DeviceProfile", + "ESCloudUploader", + "ESContentIndexer", + "DialcodeRedisIndexer", + ] +script_files: + [ + "DeviceProfileScripts", + "DruidContentIndexer", + "RedisContentIndexer", + "RedisDialcodeIndexer", + "RedisUserDataIndexer", + "run-script", + ] diff --git a/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 b/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 index 337759f5d1..0aafd7cb74 100644 --- a/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 +++ b/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 @@ -10,7 +10,7 @@ redis.max.pipeline.size="{{ redis.max_pipeline_size }}" redis.dialcode.database.index=6 -cloudStorage.accountName="{{sunbird_private_storage_account_name}}" -cloudStorage.accountKey="{{sunbird_private_storage_account_key}}" +cloudStorage.accountName="{{cloud_private_storage_accountname}}" +cloudStorage.accountKey="{{cloud_private_storage_secret}}" cloudStorage.container="{{ bucket | default('telemetry-data-store') }}" cloudStorage.dialCodeDataFile="dialcode-data/dial_code.csv" diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index ac0cb24755..9795b9a839 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -46,8 +46,8 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ cloud_private_storage_secret }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{cloud_private_storage_secret}} -Ddruid_storage_account_key={{ cloud_public_storage_accountname }} -Ddruid_storage_account_secret={{cloud_public_storage_secret}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ cloud_private_storage_secret }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{cloud_private_storage_secret}} -Ddruid_storage_account_key={{ cloud_public_storage_accountname }} -Ddruid_storage_account_secret={{cloud_public_storage_secret}}" } } {% elif (dp_object_store_type == "s3") %} diff --git a/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 b/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 index 2e613b9866..a3a156cfda 100644 --- a/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 +++ b/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 @@ -5,12 +5,12 @@ export DP_LOGS={{ analytics.home }}/logs/data-products export SERVICE_LOGS={{ analytics.home }}/logs/services export JM_HOME={{ analytics.home }}/job-manager -export azure_storage_key={{sunbird_private_storage_account_name}} -export azure_storage_secret={{sunbird_private_storage_account_key}} -export reports_azure_storage_key={{sunbird_private_storage_account_name}} -export reports_azure_storage_secret={{sunbird_private_storage_account_key}} -export druid_storage_account_key={{sunbird_public_storage_account_name}} -export druid_storage_account_secret={{sunbird_public_storage_account_key}} +export azure_storage_key={{cloud_private_storage_accountname}} +export azure_storage_secret={{cloud_private_storage_secret}} +export reports_azure_storage_key={{cloud_private_storage_accountname}} +export reports_azure_storage_secret={{cloud_private_storage_secret}} +export druid_storage_account_key={{cloud_public_storage_accountname}} +export druid_storage_account_secret={{cloud_public_storage_secret}} export heap_conf_str={{ spark.heap_conf_str }} today=$(date "+%Y-%m-%d") diff --git a/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 b/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 index f44174563c..b2acc84e37 100644 --- a/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 +++ b/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 @@ -12,12 +12,12 @@ secor.azure.endpoints.protocol=https # Microsoft Azure authentication credentials. # https://azure.microsoft.com/en-us/documentation/articles/storage-create-storage-account -secor.azure.account.name={{sunbird_private_storage_account_name}} -secor.azure.account.key={{sunbird_private_storage_account_key}} +secor.azure.account.name={{cloud_private_storage_accountname}} +secor.azure.account.key={{cloud_private_storage_secret}} # Microsoft Azure blob storage container name. Container is a grouping of a set # of blobs. https://msdn.microsoft.com/en-us/library/dd135715.aspx -secor.azure.container.name={{ azure_container_name }} +secor.azure.container.name={{ cloud_storage_telemetry_bucketname }} # Microsoft Azure blob storage path where files are stored within the container. secor.azure.path={{ secor_service_name[item[0]].base_path }} diff --git a/ansible/roles/provision-azure-spark-cluster/tasks/main.yml b/ansible/roles/provision-azure-spark-cluster/tasks/main.yml index c126dc8f3b..2f436ac4c2 100644 --- a/ansible/roles/provision-azure-spark-cluster/tasks/main.yml +++ b/ansible/roles/provision-azure-spark-cluster/tasks/main.yml @@ -1,11 +1,17 @@ - name: Adding azure blob variable to spark env file lineinfile: path: "{{spark_folder}}/conf/spark-env.sh" - line: '{{item.var}}={{item.value}}' + line: "{{item.var}}={{item.value}}" regexp: "{{ item.var }}.*" with_items: - - {var: 'azure_storage_key', value: '{{ azure_private_storage_account_name }}'} - - {var: 'azure_storage_secret', value: '{{ azure_private_storage_account_key }}'} + - { + var: "azure_storage_key", + value: "{{ cloud_private_storage_accountname }}", + } + - { + var: "azure_storage_secret", + value: "{{ cloud_private_storage_secret }}", + } no_log: true when: cloud_service_provider == "azure" @@ -15,18 +21,18 @@ path: "{{ spark_folder }}/jars/{{item.var}}-{{item.value}}.jar" state: absent with_items: - - {var: 'guava', value: '{{ guava_default_jre_version }}'} - - {var: 'guice', value: '{{ guice_default_version }}'} - - {var: 'guice-servlet', value: '{{ guice_default_version }}'} - + - { var: "guava", value: "{{ guava_default_jre_version }}" } + - { var: "guice", value: "{{ guice_default_version }}" } + - { var: "guice-servlet", value: "{{ guice_default_version }}" } + - name: Download guava_jre_url and copy to Spark jars folder become: yes get_url: url={{ guava_jre_url }} dest={{ spark_folder }}/jars/guava-{{guava_jre_version}}.jar timeout=1000 force=no - + - name: Download log4j api and copy to Spark jars folder become: yes get_url: url={{ log4j_api_url }} dest={{ spark_folder }}/jars/log4j-api-{{log4j_version}}.jar timeout=1000 force=no - + - name: Download log4j core and copy to Spark jars folder become: yes get_url: url={{ log4j_core_url }} dest={{ spark_folder }}/jars/log4j-core-{{log4j_version}}.jar timeout=1000 force=no diff --git a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml index a9edd50a3c..2eb76e196c 100644 --- a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml @@ -3,7 +3,7 @@ analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" azure_container_name: "{{secor_azure_container_name}}" -azure_account_key: "{{sunbird_private_storage_account_key}}" +azure_account_key: "{{cloud_private_storage_secret}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" diff --git a/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 b/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 index f44174563c..e55aa469c0 100644 --- a/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 +++ b/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 @@ -12,8 +12,8 @@ secor.azure.endpoints.protocol=https # Microsoft Azure authentication credentials. # https://azure.microsoft.com/en-us/documentation/articles/storage-create-storage-account -secor.azure.account.name={{sunbird_private_storage_account_name}} -secor.azure.account.key={{sunbird_private_storage_account_key}} +secor.azure.account.name={{cloud_private_storage_accountname}} +secor.azure.account.key={{cloud_private_storage_secret}} # Microsoft Azure blob storage container name. Container is a grouping of a set # of blobs. https://msdn.microsoft.com/en-us/library/dd135715.aspx diff --git a/ansible/spark-cluster-job-submit.yml b/ansible/spark-cluster-job-submit.yml index 8924fce8f2..f21aa57860 100644 --- a/ansible/spark-cluster-job-submit.yml +++ b/ansible/spark-cluster-job-submit.yml @@ -4,9 +4,7 @@ vars_files: - "{{inventory_dir}}/secrets.yml" environment: - AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" - AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" - OCI_CLI_AUTH: instance_principal + AZURE_STORAGE_ACCOUNT: "{{cloud_private_storage_accountname}}" + AZURE_STORAGE_KEY: "{{cloud_private_storage_secret}}" roles: - data-products-deploy - diff --git a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml index 633026b825..e371c56817 100644 --- a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml +++ b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml @@ -30,8 +30,8 @@ taskmanager_heap_memory: 1024 ### base-config related vars postgres_max_connections: 2 -azure_account: "{{ sunbird_private_storage_account_name }}" -azure_secret: "{{ sunbird_private_storage_account_key }}" +azure_account: "{{ cloud_private_storage_accountname }}" +azure_secret: "{{ cloud_private_storage_secret }}" flink_container_name: dev-data-store flink_dp_storage_container: "" checkpoint_store_type: azure diff --git a/kubernetes/helm_charts/druid-cluster/values.j2 b/kubernetes/helm_charts/druid-cluster/values.j2 index 169ef07b8e..bacc6bc321 100644 --- a/kubernetes/helm_charts/druid-cluster/values.j2 +++ b/kubernetes/helm_charts/druid-cluster/values.j2 @@ -24,8 +24,8 @@ druid_metadata_storage_connector_password: {{ druid_configs[cluster_type].druid_ # Druid Storage Type druid_deepstorage_type: azure -druid_azure_storage_account: "{{ sunbird_private_storage_account_name }}" -druid_azure_storage_account_key: "{{ sunbird_private_storage_account_key }}" +druid_azure_storage_account: "{{ cloud_private_storage_accountname }}" +druid_azure_storage_account_key: "{{ cloud_private_storage_secret }}" druid_azure_container: "{{ druid_configs[cluster_type].azure_container }}" # Indexing service logs diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index 183bce6c8e..7d9aa8ecec 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -1,5 +1,5 @@ -azure_account: "{{ sunbird_private_storage_account_name }}" -azure_secret: "{{ sunbird_private_storage_account_key }}" +azure_account: "{{ cloud_private_storage_accountname }}" +azure_secret: "{{ cloud_private_storage_secret }}" azure_container_name: "telemetry-data-store" s3_access_key: "{{s3_storage_key}}" From d797623ec26e1226d3233e12440d512075e637eb Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 10 Nov 2023 17:24:04 +0530 Subject: [PATCH 192/203] oci changes merge update --- ansible/roles/analytics-druid/defaults/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index c1c9ba5b90..1f49f009b8 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -30,7 +30,7 @@ druid_request_logging_type: "file" #Druid Extensions -druid_storage_type: { { cloud_storage_telemetry_type } } +druid_storage_type: "{{ cloud_storage_telemetry_type }}" druid_extensions_list: '"druid-azure-extensions", "graphite-emitter", "postgresql-metadata-storage", "druid-kafka-indexing-service", "druid-datasketches"' @@ -39,7 +39,7 @@ druid_community_extensions: # End of druid_extensions -druid_indexing_logs_type: { { cloud_storage_telemetry_type } } +druid_indexing_logs_type: "{{ cloud_storage_telemetry_type }}" druid_indexing_log_dir: /var/druid/indexing-logs druid_indexing_storage_type: metadata druid_indexing_task_basedir: "/var/task" From 4f06f64e702a9750984f49b9d7c3bf58394a20cf Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 30 Dec 2022 13:37:50 +0530 Subject: [PATCH 193/203] csp migration variables update --- ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml index 2eb76e196c..f31781473e 100644 --- a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml @@ -2,7 +2,7 @@ analytics_user: analytics analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" -azure_container_name: "{{secor_azure_container_name}}" +azure_container_name: "{{cloud_storage_telemetry_bucketname}}" azure_account_key: "{{cloud_private_storage_secret}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" From 3d2cb950cef701f888a7d4f3ea4b858b85be5ea1 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Tue, 17 Jan 2023 15:56:57 +0530 Subject: [PATCH 194/203] csp migration variables update --- .../roles/analytics-druid/defaults/main.yml | 162 ++++++++++++++++++ .../_common/common.runtime.properties | 7 + .../templates/flink_job_deployment.yaml | 4 +- .../helm_charts/datapipeline_jobs/values.j2 | 8 +- 4 files changed, 178 insertions(+), 3 deletions(-) diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index 1f49f009b8..8e55369885 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -115,6 +115,7 @@ druid_router_gc_logfile: "{{ druid_crash_logdir }}/gc.router.log" druid_router_heap_dump_file: "{{ druid_crash_logdir }}/router.hprof" default_druid_configs: +<<<<<<< HEAD raw: #Druid Postgres Details druid_postgres_db: "druid" @@ -260,5 +261,166 @@ default_druid_configs: druid_router_http_numMaxThreads: 100 druid_server_http_numThreads: 100 druid_router_managementProxy_enabled: true +======= + raw: + #Druid Postgres Details + druid_postgres_db: "druid" + druid_postgres_host: "{{ postgres.db_url }}" + druid_postgres_port: "{{ postgres.db_port }}" + druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" + #Druid Azure Details + druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" + #Logging the indexing logs to azure + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" + druid_log_azure_folder: "druidlogs" + #Druid S3 Details + druid_storage_type: "{{ cloud_storage_telemetry_type }}" + s3_access_key: "{{ cloud_private_storage_accountname }}" + s3_secret_key: "{{ cloud_private_storage_secret }}" + s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_endpoint: "{{ cloud_private_storage_endpoint }}" + s3_segment_dir: "druid/raw/segments" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" + #Logging the indexing logs to s3 + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" + #Druid GCS Details + gcloud_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_prefix: "" + gcloud_max_list_length: 1024 + #Logging the indexing logs to GCS + gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_indexer_logs_dir: "druid/raw/stage/indexing_logs" + #Druid coordinator node configuration + druid_coordinator_heap_size: 128m + druid_coordinator_period: PT30S + druid_coordinator_startDelay: PT30S + druid_coordinator_balance_strategy: diskNormalized + #Druid overlord node configuration + druid_overlord_heap_size: 256m + #Druid broker node configuration + druid_broker_min_heap_size: 128m + druid_broker_max_heap_size: 128m + druid_broker_max_direct_size: 800m + druid_broker_http_numConnections: 5 + druid_broker_server_http_numThread: 25 + druid_broker_processing_bufferBytes: 134217728 + druid_broker_processing_threads: 2 + #Druid historical node configuration + druid_historical_min_heap_size: 1048m + druid_historical_max_heap_size: 1048m + druid_historical_max_direct_size: 800m + druid_historical_http_numConnections: 5 + druid_historical_server_http_numThread: 25 + druid_historical_processing_bufferBytes: 134217728 + druid_historical_processing_threads: 2 + druid_historical_enable_cache: false + druid_historical_segmentcache_size: 10000000000 + druid_historical_server_maxsize: 10000000000 + druid_historical_processing_num_merge_buffers: 2 + druid_query_ondiskstorage_enabled: true + druid_historical_maxMergingDictionarySize: 100000000 + druid_historical_segmentcache_numloadingthreads: 4 + druid_historical_segmentcache_path: "/var/segmentstore" + druid.query.groupBy.maxOnDiskStorage: 10737418240 + #Druid middlemanager configuration + druid_middlemanager_heap_size: 128m + druid_middlemanager_worker_cap: 4 + druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" + druid_middlemanager_peon_server_http_numThread: 25 + druid_middlemanager_peon_processing_bufferBytes: 25000000 + druid_middlemanager_peon_processing_threads: 2 + druid_middlemanager_peon_server_maxsize: 0 + druid_indexing_queue_startDelay: PT30S + druid_router_heap_size: 1g + druid_router_http_numConnections: 50 + druid_router_http_readTimeout: PT5M + druid_router_http_numMaxThreads: 100 + druid_server_http_numThreads: 100 + druid_router_managementProxy_enabled: true + druid_historical_maxOnDiskStorage: 10737418240 + rollup: + #Druid Postgres Details + druid_postgres_db: "druid" + druid_postgres_host: "{{ postgres.db_url }}" + druid_postgres_port: "{{ postgres.db_port }}" + druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" + #Druid Azure Details + druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" + #Logging the indexing logs to azure + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" + druid_log_azure_folder: "druidlogs" + #Druid S3 Details + druid_storage_type: "{{ cloud_storage_telemetry_type }}" + s3_access_key: "{{ s3_storage_key }}" + s3_secret_key: "{{ s3_storage_secret }}" + s3_bucket: "{{ s3_storage_container }}" + s3_endpoint: "{{ s3_storage_endpoint }}" + s3_segment_dir: "druid/rollup/segments" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" + #Logging the indexing logs to s3 + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" + #Druid GCS Details + druid.google.bucket: "{{ cloud_storage_telemetry_bucketname }}" + druid.google.prefix: "" + druid.google.maxListingLength: 1024 + #Logging the indexing logs to gcs + gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_indexer_logs_dir: "druid/rollup/stage/indexing_logs" + #Druid coordinator node configuration + druid_coordinator_heap_size: 128m + druid_coordinator_period: PT30S + druid_coordinator_startDelay: PT30S + druid_coordinator_balance_strategy: cost + #Druid overlord node configuration + druid_overlord_heap_size: 256m + #Druid broker node configuration + druid_broker_min_heap_size: 128m + druid_broker_max_heap_size: 128m + druid_broker_max_direct_size: 700m + druid_broker_http_numConnections: 5 + druid_broker_server_http_numThread: 25 + druid_broker_processing_bufferBytes: 134217728 + druid_broker_processing_threads: 2 + #Druid historical node configuration + druid_historical_min_heap_size: 1048m + druid_historical_max_heap_size: 1048m + druid_historical_max_direct_size: 800m + druid_historical_http_numConnections: 5 + druid_historical_server_http_numThread: 25 + druid_historical_processing_bufferBytes: 134217728 + druid_historical_processing_threads: 2 + druid_historical_enable_cache: false + druid_historical_segmentcache_size: 2000000000 + druid_historical_server_maxsize: 10000000000 + druid_historical_processing_num_merge_buffers: 2 + druid_query_ondiskstorage_enabled: false + druid_historical_segmentcache_numloadingthreads: 4 + druid_historical_segmentcache_path: "/var/segmentstore" + #Druid middlemanager configuration + druid_middlemanager_heap_size: 128m + druid_middlemanager_worker_cap: 4 + druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" + druid_middlemanager_peon_server_http_numThread: 25 + druid_middlemanager_peon_processing_bufferBytes: 25000000 + druid_middlemanager_peon_processing_threads: 2 + druid_middlemanager_peon_server_maxsize: 0 + druid_indexing_queue_startDelay: PT30S + druid_router_heap_size: 1g + druid_router_http_numConnections: 50 + druid_router_http_readTimeout: PT5M + druid_router_http_numMaxThreads: 100 + druid_server_http_numThreads: 100 + druid_router_managementProxy_enabled: true +>>>>>>> 5faa25b0c (csp migration variables update) enable_druid_sql: true diff --git a/ansible/roles/analytics-druid/templates/_common/common.runtime.properties b/ansible/roles/analytics-druid/templates/_common/common.runtime.properties index 8a047b09ff..be217e65c8 100644 --- a/ansible/roles/analytics-druid/templates/_common/common.runtime.properties +++ b/ansible/roles/analytics-druid/templates/_common/common.runtime.properties @@ -70,6 +70,10 @@ druid.s3.endpoint.signingRegion={{ druid_configs[cluster].s3_v4_sign_region }} # druid.storage.sse.type=s3 # uncomment to disable chunk encoding # druid.s3.disableChunkedEncoding=true +{% elif druid_storage_type == "google" %} +druid.google.bucket={{ druid_configs[cluster].gcloud_bucket }} +druid.google.prefix={{ druid_configs[cluster].gcloud_prefix }} +druid.google.maxListingLength={{ druid_configs[cluster].gcloud_max_list_length }} {% endif %} # Indexing service logs @@ -83,6 +87,9 @@ druid.indexer.logs.prefix= {{ druid_configs[cluster].druid_log_azure_folder }} druid.indexer.logs.s3Bucket={{ druid_configs[cluster].s3_logging_bucket }} # path to logs within the bucker druid.indexer.logs.s3Prefix={{ druid_configs[cluster].s3_indexer_logs_dir }} +{% elif druid_indexing_logs_type == "google" %} +druid.indexer.logs.bucket={{ druid_configs[cluster].gcloud_logging_bucket }} +druid.indexer.logs.prefix={{ druid_configs[cluster].gcloud_indexer_logs_dir }} {% endif %} # Service discovery diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index fb3cb0dee8..35d9066161 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -122,7 +122,7 @@ spec: {{- if eq .Values.checkpoint_store_type "gcloud" }} "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", - "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", @@ -208,7 +208,7 @@ spec: {{- if eq .Values.checkpoint_store_type "gcloud" }} "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", - "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 6d2a10f88e..3049d6bdc8 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -154,7 +154,7 @@ base_config: | statebackend { blob { storage { - account = "{% if checkpoint_store_type == "azure" %}{{ azure_account }}.blob.core.windows.net{% elif checkpoint_store_type == "s3" %}{{ flink_dp_storage_container }}{% endif %}" + account = "{% if checkpoint_store_type == "azure" %}{{ azure_account }}.blob.core.windows.net{% elif checkpoint_store_type == "s3" %}{{ flink_dp_storage_container }}{% elif checkpoint_store_type == "gcloud" %}{{ flink_dp_storage_container }}{% endif %}" container = "{{ flink_container_name }}" checkpointing.dir = "checkpoint" } @@ -162,8 +162,14 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} +<<<<<<< HEAD # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} base.url = "s3://"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} +======= + base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} +{% elif checkpoint_store_type == "gcloud" %} + base.url = "gs://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} +>>>>>>> 5faa25b0c (csp migration variables update) {% endif %} } } From e2101ac1ee1df568e9deec199906bd4e1a2880bb Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Wed, 18 Jan 2023 12:12:08 +0530 Subject: [PATCH 195/203] csp migration variables update --- kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml index e371c56817..9bb98bbf4c 100644 --- a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml +++ b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml @@ -1,6 +1,6 @@ flink_namespace: "flink-{{ env }}" imagepullsecrets: "{{ env }}-registry-secret" -service_monitor_enabled: true +service_monitor_enabled: false ### Job manager related vars jobmanager_rpc_port: 6123 jobmanager_blob_port: 6124 From 1b5ebb804599c56f4f73fd265b513abe3ae8d945 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Wed, 18 Jan 2023 12:18:42 +0530 Subject: [PATCH 196/203] csp migration variables update --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 3049d6bdc8..069bdb9560 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -155,7 +155,7 @@ base_config: | blob { storage { account = "{% if checkpoint_store_type == "azure" %}{{ azure_account }}.blob.core.windows.net{% elif checkpoint_store_type == "s3" %}{{ flink_dp_storage_container }}{% elif checkpoint_store_type == "gcloud" %}{{ flink_dp_storage_container }}{% endif %}" - container = "{{ flink_container_name }}" + container = "{{ cloud_storage_flink_bucketname }}" checkpointing.dir = "checkpoint" } } From 7874a2e1ed987584a436558be3cfdd331d0a92aa Mon Sep 17 00:00:00 2001 From: Sadanand <100120230+SadanandGowda@users.noreply.github.com> Date: Wed, 18 Jan 2023 13:04:24 +0530 Subject: [PATCH 197/203] Update main.yml --- kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml index 9bb98bbf4c..43b7aac364 100644 --- a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml +++ b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml @@ -34,7 +34,7 @@ azure_account: "{{ cloud_private_storage_accountname }}" azure_secret: "{{ cloud_private_storage_secret }}" flink_container_name: dev-data-store flink_dp_storage_container: "" -checkpoint_store_type: azure +checkpoint_store_type: gcloud checkpoint_interval: 60000 checkpoint_pause_between_seconds: 5000 checkpoint_compression_enabled: true From 700eb6837546b91254e6081848d5f857f678ccb8 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 10 Nov 2023 18:16:39 +0530 Subject: [PATCH 198/203] oci changes merge update --- .../roles/postgres-db-update/tasks/main.yml | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/ansible/roles/postgres-db-update/tasks/main.yml b/ansible/roles/postgres-db-update/tasks/main.yml index e458f09ed8..cb5bb632c9 100644 --- a/ansible/roles/postgres-db-update/tasks/main.yml +++ b/ansible/roles/postgres-db-update/tasks/main.yml @@ -7,48 +7,48 @@ - name: Ensure database is created postgresql_db: name="{{ postgres.db_name }}" \ - login_host="{{ postgres.db_url }}" \ - port="{{ postgres.db_port }}" \ - login_user="{{ postgres.db_admin_user }}" \ - login_password="{{ postgres.db_admin_password }}" \ - encoding='UTF-8' \ - state=present + login_host="{{ postgres.db_url }}" \ + port="{{ postgres.db_port }}" \ + login_user="{{ postgres.db_admin_user }}" \ + login_password="{{ postgres.db_admin_password }}" \ + encoding='UTF-8' \ + state=present tags: createdb - name: Ensure database for superset is created postgresql_db: name="superset" \ - login_host="{{ postgres.db_url }}" \ - port="{{ postgres.db_port }}" \ - login_user="{{ postgres.db_admin_user }}" \ - login_password="{{ postgres.db_admin_password }}" \ - encoding='UTF-8' \ - state=present + login_host="{{ postgres.db_url }}" \ + port="{{ postgres.db_port }}" \ + login_user="{{ postgres.db_admin_user }}" \ + login_password="{{ postgres.db_admin_password }}" \ + encoding='UTF-8' \ + state=present tags: createdb - name: Ensure user has access to the database postgresql_user: name="{{ postgres.db_username }}" \ - password="{{ postgres.db_password }}" \ - no_password_changes=true \ - priv=ALL \ - state=present \ - login_host="{{ postgres.db_url }}" \ - port="{{ postgres.db_port }}" \ - login_user="{{ postgres.db_admin_user }}" \ - login_password="{{ postgres.db_admin_password }}" \ - db="{{ postgres.db_name }}" + password="{{ postgres.db_password }}" \ + no_password_changes=true \ + priv=ALL \ + state=present \ + login_host="{{ postgres.db_url }}" \ + port="{{ postgres.db_port }}" \ + login_user="{{ postgres.db_admin_user }}" \ + login_password="{{ postgres.db_admin_password }}" \ + db="{{ postgres.db_name }}" tags: createuser - name: Ensure user has access to the superset database postgresql_user: name="{{ postgres.db_username }}" \ - password="{{ postgres.db_password }}" \ - no_password_changes=true \ - priv=ALL \ - state=present \ - login_host="{{ postgres.db_url }}" \ - port="{{ postgres.db_port }}" \ - login_user="{{ postgres.db_admin_user }}" \ - login_password="{{ postgres.db_admin_password }}" \ - db="superset" + password="{{ postgres.db_password }}" \ + no_password_changes=true \ + priv=ALL \ + state=present \ + login_host="{{ postgres.db_url }}" \ + port="{{ postgres.db_port }}" \ + login_user="{{ postgres.db_admin_user }}" \ + login_password="{{ postgres.db_admin_password }}" \ + db="superset" tags: createuser - name: create {{ env }}_consumer-channel-mapping @@ -74,7 +74,7 @@ - name: update {{ env }}_report_config shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_report_config ADD COLUMN IF NOT EXISTS batch_number INTEGER" run_once: true - tags: update + tags: update - name: create report shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "CREATE TABLE IF NOT EXISTS report(reportid varchar(40) NOT NULL PRIMARY KEY, title text NOT NULL, description text NOT NULL, authorizedroles jsonb NOT NULL, status varchar(8) NOT NULL CHECK (status IN ('live', 'draft', 'retired')) DEFAULT 'draft', type varchar(8) NOT NULL CHECK (type in ('public', 'private')) DEFAULT 'private', reportaccessurl text NOT NULL UNIQUE, createdon timestamptz NOT NULL DEFAULT now(), updatedon timestamptz NOT NULL DEFAULT now(), createdby varchar(50) NOT NULL, reportconfig jsonb NOT NULL, templateurl text, slug varchar(10) NOT NULL, reportgenerateddate timestamptz NOT NULL DEFAULT now(), reportduration jsonb NOT NULL DEFAULT jsonb_build_object('startDate', now()::timestamptz, 'endDate', now()::timestamptz), tags jsonb NOT NULL, updatefrequency text NOT NULL)" @@ -109,12 +109,12 @@ - name: update {{ env }}_job_request shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ADD COLUMN IF NOT EXISTS processed_batches text" run_once: true - tags: update + tags: update - name: create {{ env }}_experiment_definition shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "CREATE TABLE IF NOT EXISTS {{ env }}_experiment_definition (exp_id VARCHAR(50), created_by VARCHAR(50), created_on TIMESTAMP, criteria VARCHAR(100), exp_data VARCHAR(300), exp_description VARCHAR(200), exp_name VARCHAR(50), stats VARCHAR(300), status VARCHAR(50), status_message VARCHAR(50), updated_by VARCHAR(50), updated_on TIMESTAMP, PRIMARY KEY(exp_id))" run_once: true - tags: create + tags: create - name: create {{ env }}_dataset_metadata shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "CREATE TABLE IF NOT EXISTS {{ env }}_dataset_metadata(dataset_id VARCHAR(50), dataset_sub_id VARCHAR(150), dataset_config json, visibility VARCHAR(50), dataset_type VARCHAR(50), version VARCHAR(10), authorized_roles text[], available_from TIMESTAMP, sample_request text, sample_response text, validation_json json, druid_query json, limits json, supported_formats text[], exhaust_type VARCHAR(50), PRIMARY KEY (dataset_id, dataset_sub_id))" @@ -139,7 +139,7 @@ - name: update report shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE report ADD COLUMN IF NOT EXISTS report_type varchar(8) NOT NULL DEFAULT 'report'" run_once: true - tags: update + tags: update - name: update report shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE report ADD COLUMN IF NOT EXISTS visibilityflags jsonb" @@ -167,6 +167,6 @@ tags: update - name: update encryption_key in {{ env }}_job_request - shell: PGPASSWORD="{{postgres.db_password}}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ALTER encryption_key TYPE varchar(500)" + shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ALTER encryption_key TYPE varchar(500)" run_once: true tags: update From dccad63020b8434b517ac3aa1538ce6ee59d9df3 Mon Sep 17 00:00:00 2001 From: kumarks1122 Date: Wed, 17 May 2023 18:07:45 +0530 Subject: [PATCH 199/203] LR-539 | Encryption key length update added --- ansible/roles/postgres-db-update/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/postgres-db-update/tasks/main.yml b/ansible/roles/postgres-db-update/tasks/main.yml index cb5bb632c9..2a84b4f443 100644 --- a/ansible/roles/postgres-db-update/tasks/main.yml +++ b/ansible/roles/postgres-db-update/tasks/main.yml @@ -109,7 +109,7 @@ - name: update {{ env }}_job_request shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ADD COLUMN IF NOT EXISTS processed_batches text" run_once: true - tags: update + tags: update - name: create {{ env }}_experiment_definition shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "CREATE TABLE IF NOT EXISTS {{ env }}_experiment_definition (exp_id VARCHAR(50), created_by VARCHAR(50), created_on TIMESTAMP, criteria VARCHAR(100), exp_data VARCHAR(300), exp_description VARCHAR(200), exp_name VARCHAR(50), stats VARCHAR(300), status VARCHAR(50), status_message VARCHAR(50), updated_by VARCHAR(50), updated_on TIMESTAMP, PRIMARY KEY(exp_id))" From b152a298ff936b8077f132f644712d28cf7ee30b Mon Sep 17 00:00:00 2001 From: kumarks1122 Date: Thu, 14 Sep 2023 10:59:30 +0530 Subject: [PATCH 200/203] LR-539 | Encryption key length update added --- ansible/roles/postgres-db-update/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/postgres-db-update/tasks/main.yml b/ansible/roles/postgres-db-update/tasks/main.yml index 2a84b4f443..b76eda5641 100644 --- a/ansible/roles/postgres-db-update/tasks/main.yml +++ b/ansible/roles/postgres-db-update/tasks/main.yml @@ -167,6 +167,6 @@ tags: update - name: update encryption_key in {{ env }}_job_request - shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ALTER encryption_key TYPE varchar(500)" + shell: PGPASSWORD="{{postgres.db_password}}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ALTER encryption_key TYPE varchar(500)" run_once: true tags: update From 7680ec6e82c15b9509b72290f038771ce6073d7e Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 13 Nov 2023 00:49:08 +0530 Subject: [PATCH 201/203] oci changes merge update --- .../roles/analytics-druid/defaults/main.yml | 186 ++---------------- .../helm_charts/datapipeline_jobs/values.j2 | 23 +-- 2 files changed, 31 insertions(+), 178 deletions(-) diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index 4f4f016798..9c160b25b2 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -39,7 +39,7 @@ druid_community_extensions: # End of druid_extensions -druid_indexing_logs_type: "{{ cloud_storage_telemetry_type }}" +druid_indexing_logs_type: "{{ cloud_storage_telemetry_type }}" druid_indexing_log_dir: /var/druid/indexing-logs druid_indexing_storage_type: metadata druid_indexing_task_basedir: "/var/task" @@ -115,7 +115,6 @@ druid_router_gc_logfile: "{{ druid_crash_logdir }}/gc.router.log" druid_router_heap_dump_file: "{{ druid_crash_logdir }}/router.hprof" default_druid_configs: -<<<<<<< HEAD raw: #Druid Postgres Details druid_postgres_db: "druid" @@ -137,11 +136,18 @@ default_druid_configs: s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_endpoint: "{{ cloud_private_storage_endpoint }}" s3_segment_dir: "druid/raw/segments" - s3_path_like_access: "{{ cloud_storage_path_style_access }}" - s3_v4_sign_region: "{{ cloud_public_storage_region }}" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" + #Druid GCS Details + gcloud_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_prefix: "" + gcloud_max_list_length: 1024 + #Logging the indexing logs to GCS + gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_indexer_logs_dir: "druid/raw/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m druid_coordinator_period: PT30S @@ -211,11 +217,18 @@ default_druid_configs: s3_bucket: "{{ s3_storage_container }}" s3_endpoint: "{{ s3_storage_endpoint }}" s3_segment_dir: "druid/rollup/segments" - s3_path_like_access: "{{ cloud_storage_path_style_access }}" - s3_v4_sign_region: "{{ cloud_public_storage_region }}" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" + #Druid GCS Details + druid.google.bucket: "{{ cloud_storage_telemetry_bucketname }}" + druid.google.prefix: "" + druid.google.maxListingLength: 1024 + #Logging the indexing logs to gcs + gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_indexer_logs_dir: "druid/rollup/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m druid_coordinator_period: PT30S @@ -261,166 +274,5 @@ default_druid_configs: druid_router_http_numMaxThreads: 100 druid_server_http_numThreads: 100 druid_router_managementProxy_enabled: true -======= - raw: - #Druid Postgres Details - druid_postgres_db: "druid" - druid_postgres_host: "{{ postgres.db_url }}" - druid_postgres_port: "{{ postgres.db_port }}" - druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" - #Druid Azure Details - druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ cloud_private_storage_accountname }}" - azure_storage_secret: "{{ cloud_private_storage_secret }}" - azure_container: "{{ cloud_storage_telemetry_bucketname }}" - #Logging the indexing logs to azure - druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" - druid_log_azure_folder: "druidlogs" - #Druid S3 Details - druid_storage_type: "{{ cloud_storage_telemetry_type }}" - s3_access_key: "{{ cloud_private_storage_accountname }}" - s3_secret_key: "{{ cloud_private_storage_secret }}" - s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" - s3_endpoint: "{{ cloud_private_storage_endpoint }}" - s3_segment_dir: "druid/raw/segments" - s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" - s3_v4_sign_region: "{{ cloud_private_storage_region }}" - #Logging the indexing logs to s3 - s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" - s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" - #Druid GCS Details - gcloud_bucket: "{{ cloud_storage_telemetry_bucketname }}" - gcloud_prefix: "" - gcloud_max_list_length: 1024 - #Logging the indexing logs to GCS - gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" - gcloud_indexer_logs_dir: "druid/raw/stage/indexing_logs" - #Druid coordinator node configuration - druid_coordinator_heap_size: 128m - druid_coordinator_period: PT30S - druid_coordinator_startDelay: PT30S - druid_coordinator_balance_strategy: diskNormalized - #Druid overlord node configuration - druid_overlord_heap_size: 256m - #Druid broker node configuration - druid_broker_min_heap_size: 128m - druid_broker_max_heap_size: 128m - druid_broker_max_direct_size: 800m - druid_broker_http_numConnections: 5 - druid_broker_server_http_numThread: 25 - druid_broker_processing_bufferBytes: 134217728 - druid_broker_processing_threads: 2 - #Druid historical node configuration - druid_historical_min_heap_size: 1048m - druid_historical_max_heap_size: 1048m - druid_historical_max_direct_size: 800m - druid_historical_http_numConnections: 5 - druid_historical_server_http_numThread: 25 - druid_historical_processing_bufferBytes: 134217728 - druid_historical_processing_threads: 2 - druid_historical_enable_cache: false - druid_historical_segmentcache_size: 10000000000 - druid_historical_server_maxsize: 10000000000 - druid_historical_processing_num_merge_buffers: 2 - druid_query_ondiskstorage_enabled: true - druid_historical_maxMergingDictionarySize: 100000000 - druid_historical_segmentcache_numloadingthreads: 4 - druid_historical_segmentcache_path: "/var/segmentstore" - druid.query.groupBy.maxOnDiskStorage: 10737418240 - #Druid middlemanager configuration - druid_middlemanager_heap_size: 128m - druid_middlemanager_worker_cap: 4 - druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" - druid_middlemanager_peon_server_http_numThread: 25 - druid_middlemanager_peon_processing_bufferBytes: 25000000 - druid_middlemanager_peon_processing_threads: 2 - druid_middlemanager_peon_server_maxsize: 0 - druid_indexing_queue_startDelay: PT30S - druid_router_heap_size: 1g - druid_router_http_numConnections: 50 - druid_router_http_readTimeout: PT5M - druid_router_http_numMaxThreads: 100 - druid_server_http_numThreads: 100 - druid_router_managementProxy_enabled: true - druid_historical_maxOnDiskStorage: 10737418240 - rollup: - #Druid Postgres Details - druid_postgres_db: "druid" - druid_postgres_host: "{{ postgres.db_url }}" - druid_postgres_port: "{{ postgres.db_port }}" - druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" - #Druid Azure Details - druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ cloud_private_storage_accountname }}" - azure_storage_secret: "{{ cloud_private_storage_secret }}" - azure_container: "{{ cloud_storage_telemetry_bucketname }}" - #Logging the indexing logs to azure - druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" - druid_log_azure_folder: "druidlogs" - #Druid S3 Details - druid_storage_type: "{{ cloud_storage_telemetry_type }}" - s3_access_key: "{{ s3_storage_key }}" - s3_secret_key: "{{ s3_storage_secret }}" - s3_bucket: "{{ s3_storage_container }}" - s3_endpoint: "{{ s3_storage_endpoint }}" - s3_segment_dir: "druid/rollup/segments" - s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" - s3_v4_sign_region: "{{ cloud_private_storage_region }}" - #Logging the indexing logs to s3 - s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" - s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" - #Druid GCS Details - druid.google.bucket: "{{ cloud_storage_telemetry_bucketname }}" - druid.google.prefix: "" - druid.google.maxListingLength: 1024 - #Logging the indexing logs to gcs - gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" - gcloud_indexer_logs_dir: "druid/rollup/stage/indexing_logs" - #Druid coordinator node configuration - druid_coordinator_heap_size: 128m - druid_coordinator_period: PT30S - druid_coordinator_startDelay: PT30S - druid_coordinator_balance_strategy: cost - #Druid overlord node configuration - druid_overlord_heap_size: 256m - #Druid broker node configuration - druid_broker_min_heap_size: 128m - druid_broker_max_heap_size: 128m - druid_broker_max_direct_size: 700m - druid_broker_http_numConnections: 5 - druid_broker_server_http_numThread: 25 - druid_broker_processing_bufferBytes: 134217728 - druid_broker_processing_threads: 2 - #Druid historical node configuration - druid_historical_min_heap_size: 1048m - druid_historical_max_heap_size: 1048m - druid_historical_max_direct_size: 800m - druid_historical_http_numConnections: 5 - druid_historical_server_http_numThread: 25 - druid_historical_processing_bufferBytes: 134217728 - druid_historical_processing_threads: 2 - druid_historical_enable_cache: false - druid_historical_segmentcache_size: 2000000000 - druid_historical_server_maxsize: 10000000000 - druid_historical_processing_num_merge_buffers: 2 - druid_query_ondiskstorage_enabled: false - druid_historical_segmentcache_numloadingthreads: 4 - druid_historical_segmentcache_path: "/var/segmentstore" - #Druid middlemanager configuration - druid_middlemanager_heap_size: 128m - druid_middlemanager_worker_cap: 4 - druid_mm_java_opts_array: "-server -Xmx900m -XX:+UseG1GC -XX:HeapDumpPath={{ druid_mm_heap_dump_file }} -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps" - druid_middlemanager_peon_server_http_numThread: 25 - druid_middlemanager_peon_processing_bufferBytes: 25000000 - druid_middlemanager_peon_processing_threads: 2 - druid_middlemanager_peon_server_maxsize: 0 - druid_indexing_queue_startDelay: PT30S - druid_router_heap_size: 1g - druid_router_http_numConnections: 50 - druid_router_http_readTimeout: PT5M - druid_router_http_numMaxThreads: 100 - druid_server_http_numThreads: 100 - druid_router_managementProxy_enabled: true ->>>>>>> 5faa25b0c (csp migration variables update) enable_druid_sql: true diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 9802f6fabf..4e2920caf7 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -3,13 +3,18 @@ imagepullsecrets: {{ imagepullsecrets }} dockerhub: {{ dockerhub }} repository: {{flink_repository|default('sunbird-datapipeline')}} image_tag: {{ image_tag }} -checkpoint_store_type: {{ cloud_service_provider }} -cloud_storage_key: {{ cloud_private_storage_accountname }} -cloud_storage_secret: {{ cloud_private_storage_secret }} -cloud_storage_endpoint: {{ cloud_private_storage_endpoint }} -s3_path_style_access: {{ cloud_storage_pathstyle_access }} -cloud_storage_project_id: {{ cloud_private_storage_project }} - +checkpoint_store_type: {{ checkpoint_store_type }} +azure_account: {{ azure_account }} +azure_secret: {{ azure_secret }} +s3_access_key: {{ s3_storage_key }} +s3_secret_key: {{ s3_storage_secret }} +{% if cloud_service_provider == "oci" %} +s3_endpoint: {{ oci_flink_s3_storage_endpoint }} +{% else %} +s3_endpoint: {{ s3_storage_endpoint }} +{% endif %} +s3_path_style_access: {{ s3_path_style_access }} +cloud_storage_project_id: {{ cloud_public_storage_project }} serviceMonitor: enabled: {{ service_monitor_enabled | lower}} replicaCount: {{taskmanager_replicacount|default(1)}} @@ -157,10 +162,6 @@ base_config: | {% if checkpoint_store_type == "azure" %} base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} -<<<<<<< HEAD - # base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} - base.url = "s3://"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} -======= base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "gcloud" %} base.url = "gs://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} From e9335af93512fa5917208b62918a23807833d94d Mon Sep 17 00:00:00 2001 From: Sadanand <100120230+SadanandGowda@users.noreply.github.com> Date: Tue, 14 Nov 2023 14:09:21 +0530 Subject: [PATCH 202/203] Update common.conf.j2 --- ansible/roles/data-products-deploy/templates/common.conf.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 439cdd9033..d29f854f8f 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -18,7 +18,6 @@ cloud_storage_telemetry_type="azure" cloud_storage_telemetry_type="s3" cloud_private_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" ->>>>>>> 72bae4d12 (common variable changes in datapipeline/flink-jobs) aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" {% endif %} From 11d0d0b540d7e923e5437e64ee25d63c3ff1a924 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 17 Nov 2023 15:07:45 +0530 Subject: [PATCH 203/203] added cloud_storage_telemetry_type variable --- .../sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala index 6599152b1a..3cea7b170b 100644 --- a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala +++ b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala @@ -179,9 +179,9 @@ class AssessmentAggregatorTaskTestSpec extends BaseTestSpec { when(mockKafkaUtil.kafkaStringSink(forceValidationAssessmentConfig.kafkaCertIssueTopic)).thenReturn(new certificateIssuedEventsSink) val task = new AssessmentAggregatorStreamTask(forceValidationAssessmentConfig, mockKafkaUtil) task.process() - BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.batchSuccessCount}").getValue() should be(1) + BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.batchSuccessCount}").getValue() should be(3) BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.cacheHitCount}").getValue() should be(5) - BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.apiHitSuccessCount}").getValue() should be(0) + BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.apiHitSuccessCount}").getValue() should be(2) BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.ignoredEventsCount}").getValue() should be(1) }