-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathproblem61
25 lines (18 loc) · 1.07 KB
/
problem61
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
Problem Scenario 43 : You have been given following code snippet.
val grouped = sc.parallelize(Seq(((1,"twoM), List((3,4), (5,6)))))
val flattened = grouped.flatMap {A =>
groupValues.map { value => B }
}
You need to generate following output.
Hence replace A and B
Array((1,two,3,4),(1,two,5,6))
scala> val grouped = sc.parallelize(Seq(((1,"two"), List((3,4), (5,6)))))
grouped: org.apache.spark.rdd.RDD[((Int, String), List[(Int, Int)])] = ParallelCollectionRDD[0] at parallelize at <console>:27
scala> val flattened = grouped.flatMap{A => groupValues.map{value => B}}
<console>:29: error: not found: value groupValues
val flattened = grouped.flatMap{A => groupValues.map{value => B}}
^
scala> val flattened = grouped.flatMap{case(key, groupValues) => groupValues.map{value => (key._1, key._2, value._1, value._2)}}
flattened: org.apache.spark.rdd.RDD[(Int, String, Int, Int)] = MapPartitionsRDD[1] at flatMap at <console>:29
scala> flattened.collect
res0: Array[(Int, String, Int, Int)] = Array((1,two,3,4), (1,two,5,6))