forked from Chamberpain/ARGONE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTransMat.py
175 lines (148 loc) · 6.08 KB
/
TransMat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from __init__ import ROOT_DIR
import sys
sys.path.append(ROOT_DIR)
from list import flat_list,LonList,LatList,GeoList
from TransGeo import TransitionGeo
import numpy as np
import scipy.sparse
import json
import scipy.sparse.linalg
import os
from scipy.sparse import _sparsetools
from scipy.sparse.sputils import (get_index_dtype,upcast)
import pickle
import geopy
class BaseMat(scipy.sparse.csc_matrix):
"""Base class for transition and correlation matrices"""
def __init__(self, *args,trans_geo=None,**kwargs):
super().__init__(*args,**kwargs)
if trans_geo:
self.set_trans_geo(trans_geo)
def set_trans_geo(self,trans_geo):
self.trans_geo = trans_geo
@classmethod
def load(cls,filename,GeoClass=TransitionGeo):
row_idx,column_idx,data,tuple_list = np.load(filename,allow_pickle=True)
shape_num = len(tuple_list)
trans_geo = GeoClass()
trans_geo.set_total_list(tuple_list)
out_data = cls((data.tolist(),(row_idx.tolist(),column_idx.tolist())),shape=(shape_num,shape_num),trans_geo = trans_geo)
return out_data
def new_sparse_matrix(self,data):
row_idx,column_idx,dummy = scipy.sparse.find(self)
return BaseMat((data,(row_idx,column_idx)),shape=(len(self.trans_geo.total_list),len(self.trans_geo.total_list)))
def mean(self,axis=0):
return np.array(self.sum(axis=axis)/(self!=0).sum(axis=axis)).flatten()
def _binopt(self, other, op):
""" This is included so that when sparse matrices are added together, their instance variables are maintained this code was grabbed from the scipy source with the small addition at the end"""
other = self.__class__(other)
# e.g. csr_plus_csr, csr_minus_csr, etc.
fn = getattr(_sparsetools, self.format + op + self.format)
maxnnz = self.nnz + other.nnz
idx_dtype = get_index_dtype((self.indptr, self.indices,
other.indptr, other.indices),
maxval=maxnnz)
indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
indices = np.empty(maxnnz, dtype=idx_dtype)
bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_']
if op in bool_ops:
data = np.empty(maxnnz, dtype=np.bool_)
else:
data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype))
fn(self.shape[0], self.shape[1],
np.asarray(self.indptr, dtype=idx_dtype),
np.asarray(self.indices, dtype=idx_dtype),
self.data,
np.asarray(other.indptr, dtype=idx_dtype),
np.asarray(other.indices, dtype=idx_dtype),
other.data,
indptr, indices, data)
if issubclass(type(self),TransMat):
try:
A = self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo
,number_data=self.number_data)
except AttributeError:
A = self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo)
else:
try:
A = self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo
,number_data=self.number_data)
except AttributeError:
A = self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo)
A.prune()
return A
def _mul_sparse_matrix(self, other):
""" This is included so that when sparse matrices are multiplies together,
their instance variables are maintained this code was grabbed from the scipy
source with the small addition at the end"""
M, K1 = self.shape
K2, N = other.shape
major_axis = self._swap((M, N))[0]
other = self.__class__(other) # convert to this format
idx_dtype = get_index_dtype((self.indptr, self.indices,
other.indptr, other.indices))
fn = getattr(_sparsetools, self.format + '_matmat_maxnnz')
nnz = fn(M, N,
np.asarray(self.indptr, dtype=idx_dtype),
np.asarray(self.indices, dtype=idx_dtype),
np.asarray(other.indptr, dtype=idx_dtype),
np.asarray(other.indices, dtype=idx_dtype))
idx_dtype = get_index_dtype((self.indptr, self.indices,
other.indptr, other.indices),
maxval=nnz)
indptr = np.empty(major_axis + 1, dtype=idx_dtype)
indices = np.empty(nnz, dtype=idx_dtype)
data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
fn = getattr(_sparsetools, self.format + '_matmat')
fn(M, N, np.asarray(self.indptr, dtype=idx_dtype),
np.asarray(self.indices, dtype=idx_dtype),
self.data,
np.asarray(other.indptr, dtype=idx_dtype),
np.asarray(other.indices, dtype=idx_dtype),
other.data,
indptr, indices, data)
if issubclass(type(self),TransMat):
try:
return self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo
,number_data=self.number_data)
except AttributeError:
return self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo)
else:
try:
return self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo
,number_data=self.number_data)
except AttributeError:
return self.__class__((data, indices, indptr), shape=self.shape,trans_geo=self.trans_geo)
class TransMat(BaseMat):
def __init__(self, *args,rescale=False,**kwargs):
super().__init__(*args,**kwargs)
if rescale:
self.rescale()
def remove_small_values(self,value):
row_idx,column_idx,data = scipy.sparse.find(self)
mask = data>value
row_idx = row_idx[mask]
column_idx = column_idx[mask]
data = data[mask]
return self.__class__((data,(row_idx,column_idx)),shape=self.shape,trans_geo=self.trans_geo
,rescale=True)
def multiply(self,mult,value=0.02):
mat1 = self.remove_small_values(self.data.mean()/25)
mat2 = self.remove_small_values(self.data.mean()/25)
for k in range(mult):
print('I am at ',k,' step in the multiplication')
mat_holder = mat1.dot(mat2)
mat1 = mat_holder.remove_small_values(mat_holder.data.mean()/25)
return mat1
def rescale(self,checksum=10**-2):
div_array = np.abs(self.sum(axis=0)).tolist()[0]
row_idx,column_idx,data = scipy.sparse.find(self)
col_count = []
for col in column_idx:
col_count.append(float(div_array[col]))
self.data = np.array(data)/np.array(col_count)
zero_idx = np.where(np.abs(self.sum(axis=0))==0)[1]
self[zero_idx,zero_idx]=1
self.matrix_column_check(checksum=checksum)
def matrix_column_check(self,checksum):
assert (np.abs(self.sum(axis=0)-1)<checksum).all()