-
Notifications
You must be signed in to change notification settings - Fork 1
/
step_wise_queries.txt
69 lines (52 loc) · 4.41 KB
/
step_wise_queries.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
STEP 1 : /* UPLOADING .nt FILE ON HDFS AND CONVERTING IT INTO A .txt FILE WITH "^" AS DELIMITER (I have named it a .ct(caret) file :)) */
//Created a Java Map Reduce Program for conversion(To see the codes, Open JavaCodes->NTriplesToCaretSepereated)
hdfs dfs -mkdir /user/cloudera/Navigator
hdfs dfs -put /home/cloudera/Desktop/Navigator/geo_coordinates_en.nt /user/cloudera/Navigator
hadoop jar /home/cloudera/Desktop/Navigator/JavaCodes/NTriplesToCaretSeperated/Jar1.jar FileDriver /user/cloudera/Navigator/geo_coordinates_en.nt convertedfile
hdfs dfs -get /user/cloudera/Navigator/convertedfile/part-r-00000 /home/cloudera/Navigator -geo_coordinates.ct
STEP 2 : /* STORING geo_coordinates.ct IN A TABLE USING HIVE */
hive
hive> create database navigator ;
hive> use navigator ;
hive> create table rawcitydata(city string, latitude string, longitude string) row format delimited fields terminated by '^' ;
hive> load data local inpath '/home/cloudera/Desktop/Navigator/geo_coordinates.ct' into table rawcitydata ;
//Created a User Defined Function - "partition()" to find the value of partitioned index
//Created a User Defined Function - "distance()" to find the spherical distance between two GPS coordinates
//Created a User Defined Function - "direction()" to find the direction of a city with respect to user's city(To see he codes, Open NavigatorUDFs)
hive> add jar /home/cloudera/Desktop/Navigator/NavigatorUDFs/Jar2.jar ;
hive> create temporary function part as 'Partition' ;
hive> create temporary function distance as 'Distance' ;
hive> create temporary function direction as 'Direction' ;
hive> create table partitionedcitydata(city string, latitude string, longitude string) partitioned by(value string);
hive> set hive.exec.dynamic.partition = true; set hive.exec.dynamic.partition.mode = nonstrict;
hive> insert into table partitionedcitydata partition(value) select city, substr(latitude, 5, length(latitude) - 4), substr(longitude, 6, length(longitude) - 5), part(substr(longitude, 6, length(longitude) - 5), 0) from rawcitydata ;
STEP 3 : /* CREATING A TABLE USING HIVE TO STORE DATA OF 50 CITIES(This will be our seedfile) */
hive> create table userdata(city string, latitude string, longitude string) row format delimited fields terminated by ',' ;
hive> load data local inpath '/home/cloudera/Desktop/Navigator/user_data.csv' into table userdata ;
STEP 4 /* STORING THE FINAL OUTPUT AND CONVERTING IT INTO .tsv FORMAT(I have named it a .csv file) */
hive> create table navdata(usercity string, direction string, city string, latitude string, longitude string, distance string) row format delimited fields terminated by '\t' ;
hive> insert into table navdata
> select usercity, direction(lat2, long2, lat1, long1) as direction, city, lat1 as latitude, long1 as longitude, d as distance
> from(
> select usercity, city, lat1, long1, lat2, long2, d
> from(
> select usercity, city, lat1, long1, lat2, long2, distance(lat1, long1, lat2, long2) as d, value
> from(
> select a.city as usercity, part(a.longitude, 0) as p, part(a.longitude, -1) as pleft, part(a.longitude, 1) as pright, a.latitude as lat2, a.longitude as long2, b.city as city, b.latitude as lat1, b.longitude as long1, b.value as value
> from partitionedcitydata b, userdata a
> )f
> where value = p or value = pleft or value = pright
> )s
> where d <= 25
> )t ;
hive> insert overwrite local directory '/home/cloudera/Desktop/Navigator/CSVFinalOutput'
> row format delimited fields terminated by '\t'
> select usercity, direction, substr(city, 1, length(city) - 1), latitude, longitude, distance from navdata order by usercity, direction, distance ;
STEP 5 : /* CONVERTING .TSV FILE TO .JSON FILE FORMAT */
//Created a Java Map Reduce Program for conversion( To see the codes, Open JavaCodes->CSVToJSON)
hdfs dfs -put /home/cloudera/Desktop/Navigator/CSVFinalOutput/FinalOutput.csv /user/cloudera/Navigator
hadoop jar /home/cloudera/Desktop/Navigator/JavaCodes/CSVToJSON/Jar3.jar JSONDriver /user/cloudera/Navigator/FinalOutput.csv JSONFinalOutput
hdfs dfs -get /user/cloudera/Navigator/JSONFinalOutput/part-r-00000 /home/cloudera/Desktop/Navigator/JSONFinalOutput/FinalOutput.json
Open FinalOutput.json file.
Insert "{"(without double quote) at the beginning of the file.
Now go to the end of the file and remove the last comma(",") and now insert "}"(without double quote) at the end of the file.