-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovies.py
31 lines (29 loc) · 983 Bytes
/
movies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from pyspark.sql.functions import col
class MovieDict():
def __init__(self, spark):
schema = StructType(
[
StructField("id", IntegerType(), False),
StructField("year", IntegerType(), False),
StructField("name", StringType(), False)
]
)
filename = 'data/movie_titles.csv'
self.movieTitles = spark.read.csv(filename, schema=schema)
self.options = {
f"{x['name']} - {x['year']}" : x['id']
for x in self.movieTitles.collect()
}
def get_titles(self, movie_ids):
return [
f"{r.name} - {r.year}"
for r in self.movieTitles\
.filter(
col('id').isin(movie_ids)
)\
.select(
'name','year'
)\
.collect()
]