forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyspark-array-string.py
31 lines (25 loc) · 930 Bytes
/
pyspark-array-string.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# -*- coding: utf-8 -*-
"""
author SparkByExamples.com
"""
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[1]") \
.appName('SparkByExamples.com') \
.getOrCreate()
columns = ["name","languagesAtSchool","currentState"]
data = [("James,,Smith",["Java","Scala","C++"],"CA"), \
("Michael,Rose,",["Spark","Java","C++"],"NJ"), \
("Robert,,Williams",["CSharp","VB"],"NV")]
df = spark.createDataFrame(data=data,schema=columns)
df.printSchema()
df.show(truncate=False)
from pyspark.sql.functions import col, concat_ws
df2 = df.withColumn("languagesAtSchool",
concat_ws(",",col("languagesAtSchool")))
df2.printSchema()
df2.show(truncate=False)
df.createOrReplaceTempView("ARRAY_STRING")
spark.sql("select name, concat_ws(',',languagesAtSchool) as languagesAtSchool," + \
" currentState from ARRAY_STRING") \
.show(truncate=False)