-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassignment2.py
executable file
·155 lines (119 loc) · 7.58 KB
/
assignment2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python3
import pandas
import numpy
# TODO: use me
def calculate_bins(data_set, variable, user_bins, user_labels, new_variable):
# Calculate the latitude bins and include them in the craters data-set
data_set = pandas.cut(data_set[variable], user_bins, labels=user_labels)
data_set[new_variable] = pandas.cut(data_set[variable], user_bins, labels=user_labels)
def print_delimiter(ch='#', count=40):
print('')
print(count * (ch + ' '))
print('')
if __name__ == '__main__':
print_delimiter()
print('Study of the distribution of craters over the Martian surface - Assignment 2')
print_delimiter()
craters = pandas.read_csv('marscrater_pds.csv', low_memory=False) # load the data from the csv file
# Filter the raw data and select only the colums that are relevant for the study.
craters = craters.filter(['CRATER_ID', 'CRATER_NAME', 'LATITUDE_CIRCLE_IMAGE', 'LONGITUDE_CIRCLE_IMAGE', 'DIAM_CIRCLE_IMAGE'])
# Filter the craters that have diameter larger than 200km. They are relatively small group and won't be taken into account in the study.
craters = craters[(craters['DIAM_CIRCLE_IMAGE'] <= 200)]
# Print some general info about the crater data
print('The number of observations is {0} and the number of variables is {1}.'.format(len(craters), len(craters.columns)))
print_delimiter()
# This part of the code examines the latitude variable.
latitude_bins = [-90, -80, -70, -60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90,] # define the latitude bins
# Define the corresponding latitude labels.
latitude_labels = [ '-90 < latitude <= -80',
'-80 < latitude <= -70',
'-70 < latitude <= -60',
'-60 < latitude <= -50',
'-50 < latitude <= -40',
'-40 < latitude <= -30',
'-30 < latitude <= -20',
'-20 < latitude <= -10',
'-10 < latitude <= 0',
' 0 < latitude <= 10',
' 10 < latitude <= 20',
' 20 < latitude <= 30',
' 30 < latitude <= 40',
' 40 < latitude <= 50',
' 50 < latitude <= 60',
' 60 < latitude <= 70',
' 70 < latitude <= 80',
' 80 < latitude <= 90',
]
# Calculate the latitude bins and include them in the craters data-set
latitude_craters = pandas.cut(craters['LATITUDE_CIRCLE_IMAGE'], latitude_bins, labels=latitude_labels)
craters['latitude_categories'] = pandas.cut(craters['LATITUDE_CIRCLE_IMAGE'], latitude_bins, labels=latitude_labels)
del latitude_bins, latitude_labels, latitude_craters # delete the unnecessary variables
print('Martian craters latitude frequency distribution [COUNTS]. The craters are organized into bins. Each bin includes the count of craters from 10° angle:', end='\n')
print(craters['latitude_categories'].value_counts(sort=False), end='\n\n')
print('Martian craters latitude frequency distribution [PERCENTAGE]. The craters are organized into bins. Each bin includes craters percentage from 10° angle:', end='\n')
print(100 * (craters['latitude_categories'].value_counts(sort=False, normalize=True)), end='\n')
print_delimiter()
# This part of the code examines the longitude variable.
longitude_bins = [-180, -160, -140, -120, -100, -80, -60, -40, -20, 0, 20, 40, 60, 80, 100, 120, 140, 160, 180] # define the longitude bins
# Define the corresponding longitude labels.
longitude_labels = [ '-180 < longitude <= -160',
'-160 < longitude <= -140',
'-140 < longitude <= -120',
'-120 < longitude <= -100',
'-100 < longitude <= -80',
' -80 < longitude <= -60',
' -60 < longitude <= -40',
' -40 < longitude <= -20',
' -20 < longitude <= 0',
' 0 < longitude <= 20',
' 20 < longitude <= 40',
' 40 < longitude <= 60',
' 60 < longitude <= 80',
' 80 < longitude <= 100',
' 100 < longitude <= 120',
' 120 < longitude <= 140',
' 140 < longitude <= 160',
' 160 < longitude <= 180',
]
# Calculate the longitude bins and include them in the crater data-set
longitude_craters = pandas.cut(craters['LONGITUDE_CIRCLE_IMAGE'], longitude_bins, labels=longitude_labels)
craters['longitude_categories'] = pandas.cut(craters['LONGITUDE_CIRCLE_IMAGE'], longitude_bins, labels=longitude_labels)
del longitude_bins, longitude_labels, longitude_craters # delete the unnecessary variables
print('Martian craters longitude frequency distribution [COUNTS]. The craters are organized into bins. Each bin includes the count of craters from 20° angle:', end='\n')
print(craters['longitude_categories'].value_counts(sort=False), end='\n\n')
print('Martian craters longitude frequency distribution [PERCENTAGE]. The craters are organized into bins. Each bin includes craters percentage from 20° angle:', end='\n')
print(100 * (craters['longitude_categories'].value_counts(sort=False, normalize=True)), end='\n')
print_delimiter()
# This part of the code examines the diameter variable.
diameter_bins = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200] # define the diameter bins
# Define the corresponding diameter labels.
diameter_labels = [ ' 1 < diameter <= 10',
' 10 < diameter <= 20',
' 20 < diameter <= 30',
' 30 < diameter <= 40',
' 40 < diameter <= 50',
' 50 < diameter <= 60',
' 60 < diameter <= 70',
' 70 < diameter <= 80',
' 80 < diameter <= 90',
' 90 < diameter <= 100',
'100 < diameter <= 110',
'110 < diameter <= 120',
'120 < diameter <= 130',
'130 < diameter <= 140',
'140 < diameter <= 150',
'150 < diameter <= 160',
'160 < diameter <= 170',
'170 < diameter <= 180',
'180 < diameter <= 190',
'190 < diameter <= 200',
]
# Calculate the diameter bins and include them in the crater data-set
diameter_craters = pandas.cut(craters['DIAM_CIRCLE_IMAGE'], diameter_bins, labels=diameter_labels)
craters['diameter_categories'] = pandas.cut(craters['DIAM_CIRCLE_IMAGE'], diameter_bins, labels=diameter_labels)
del diameter_bins, diameter_labels, diameter_craters # delete the unnecessary variables
print('Martian craters diameter frequency distribution [COUNTS]. The craters are organized into bins:')
print(craters['diameter_categories'].value_counts(sort=False), end='\n\n')
print('Martian craters diameter frequency distribution [PERCENTAGE]. The craters are organized into bins:', end='\n')
print(100 * (craters['diameter_categories'].value_counts(sort=False, normalize=True)), end='\n')
print_delimiter()