Skip to content

Commit

Permalink
[MINOR] add correct python dependencies
Browse files Browse the repository at this point in the history
This commit fixes the python dependencies to support scuro.

Closes #2117
  • Loading branch information
christinadionysio authored and Baunsgaard committed Sep 24, 2024
1 parent 7d9230b commit 95c74be
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 17 deletions.
15 changes: 14 additions & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,20 @@ jobs:
pip install --upgrade pip
pip install --upgrade pip
pip install wheel
pip install numpy py4j scipy scikit-learn keras requests pandas unittest-parallel
pip install \
numpy \
py4j \
scipy \
scikit-learn \
requests \
pandas \
unittest-parallel \
torchvision \
transformers \
opencv-python \
torch \
librosa \
h5py
- name: Build Python Package
run: |
Expand Down
4 changes: 2 additions & 2 deletions src/main/python/systemds/scuro/representations/average.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np

from systemds.scuro.modality.modality import Modality
from keras.api.preprocessing.sequence import pad_sequences
from systemds.scuro.representations.utils import pad_sequences

from systemds.scuro.representations.fusion import Fusion

Expand All @@ -41,7 +41,7 @@ def fuse(self, modalities: List[Modality]):

padded_modalities = []
for modality in modalities:
d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post')
d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32')
padded_modalities.append(d)

data = padded_modalities[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np

from systemds.scuro.modality.modality import Modality
from keras.api.preprocessing.sequence import pad_sequences
from systemds.scuro.representations.utils import pad_sequences

from systemds.scuro.representations.fusion import Fusion

Expand All @@ -51,7 +51,7 @@ def fuse(self, modalities: List[Modality]):

for modality in modalities:
if self.padding:
data = np.concatenate([data, pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post')], axis=-1)
data = np.concatenate([data, pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32')], axis=-1)
else:
data = np.concatenate([data, modality.data], axis=-1)

Expand Down
4 changes: 2 additions & 2 deletions src/main/python/systemds/scuro/representations/max.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np

from systemds.scuro.modality.modality import Modality
from keras.preprocessing.sequence import pad_sequences
from systemds.scuro.representations.utils import pad_sequences

from systemds.scuro.representations.fusion import Fusion

Expand All @@ -46,7 +46,7 @@ def fuse(self, modalities: List[Modality],):

padded_modalities = []
for modality in modalities:
d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32', padding='post')
d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32')
padded_modalities.append(d)

split_rows = int(len(modalities[0].data) / self.split)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import librosa
import numpy as np
from keras.src.utils import pad_sequences
from systemds.scuro.representations.utils import pad_sequences

from systemds.scuro.representations.unimodal import UnimodalRepresentation

Expand All @@ -51,7 +51,7 @@ def parse_all(self, file_path, indices, get_sequences=False):

r = []
for elem in result:
d = pad_sequences(elem, maxlen=max_length, dtype='float32', padding='post')
d = pad_sequences(elem, maxlen=max_length, dtype='float32')
r.append(d)

np_array_r = np.array(r) if not self.avg else np.mean(np.array(r), axis=1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np

from systemds.scuro.modality.modality import Modality
from keras.preprocessing.sequence import pad_sequences
from systemds.scuro.representations.utils import pad_sequences

from systemds.scuro.representations.fusion import Fusion

Expand All @@ -39,10 +39,10 @@ def __init__(self):
def fuse(self, modalities: List[Modality], train_indices=None):
max_emb_size = self.get_max_embedding_size(modalities)

data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32', padding='post')
data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32')

for m in range(1, len(modalities)):
# scaled = self.scale_data(modalities[m].data, train_indices)
data = np.multiply(data, pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32', padding='post'))
data = np.multiply(data, pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32'))

return data
4 changes: 2 additions & 2 deletions src/main/python/systemds/scuro/representations/rowmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import numpy as np

from modality.modality import Modality
from keras.api.preprocessing.sequence import pad_sequences
from systemds.scuro.representations.utils import pad_sequences

from representations.fusion import Fusion

Expand All @@ -47,7 +47,7 @@ def fuse(self, modalities: List[Modality], train_indices):
padded_modalities = []
for modality in modalities:
scaled = self.scale_data(modality.data, train_indices)
d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32', padding='post')
d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32')
padded_modalities.append(d)

split_rows = int(len(modalities[0].data) / self.split)
Expand Down
6 changes: 3 additions & 3 deletions src/main/python/systemds/scuro/representations/sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


from systemds.scuro.modality.modality import Modality
from keras.preprocessing.sequence import pad_sequences
from systemds.scuro.representations.utils import pad_sequences

from systemds.scuro.representations.fusion import Fusion

Expand All @@ -38,9 +38,9 @@ def __init__(self):
def fuse(self, modalities: List[Modality]):
max_emb_size = self.get_max_embedding_size(modalities)

data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32', padding='post')
data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32')

for m in range(1, len(modalities)):
data += pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32', padding='post')
data += pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32')

return data
13 changes: 13 additions & 0 deletions src/main/python/systemds/scuro/representations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,16 @@ def __init__(self):
def parse_all(self, filepath, indices):
with open(filepath) as file:
return json.load(file)


def pad_sequences(sequences, maxlen=None, dtype='float32', value=0):
if maxlen is None:
maxlen = max([len(seq) for seq in sequences])

result = np.full((len(sequences), maxlen), value, dtype=dtype)

for i, seq in enumerate(sequences):
data = seq[:maxlen]
result[i, :len(data)] = data

return result

0 comments on commit 95c74be

Please sign in to comment.