Data path does not match the downloaded dataset #1

coasxu · 2023-07-21T15:06:22Z

Thanks for your great work!

I download the PAMAP2 dataset via the link(https://archive.ics.uci.edu/dataset/231/pamap2+physical+activity+monitoring), but I can not find pamap2_data_100.pkl file. Whether do you pre-process the dataset? Is there any code? Thanks!

FedAlign/main.py

Line 40 in 64acbff

data_path = os.path.join(DATA_DIR, 'PAMAP2/pamap2_data_100.pkl')

The text was updated successfully, but these errors were encountered:

jiayunz · 2023-07-22T07:27:30Z

Hi, the data pre-processing mainly contains three steps: 1) perform z-score normalization; 2) apply a sliding window of length 100 to get non-overlapping time windows; 3) keep those windows where the label is the same for all timesteps. You can refer to the code below:

import pandas as pd
import numpy as np
import pickle

seq = 100

def z_norm(x):
    mean = np.nanmean(x, axis=0)
    var = np.nanstd(x, axis=0)
    return mean, var

def generate_three_IMU(name):
    x = name + '_x'
    y = name + '_y'
    z = name + '_z'
    return [x, y, z]

def generate_four_IMU(name):
    x = name + '_x'
    y = name + '_y'
    z = name + '_z'
    w = name + '_w'
    return [x, y, z, w]

def generate_cols_IMU(name):
    # temp
    temp = name + '_temperature'
    output = [temp]
    # acceleration 16
    acceleration16 = name + '_3D_acceleration_16'
    acceleration16 = generate_three_IMU(acceleration16)
    output.extend(acceleration16)
    # acceleration 6
    acceleration6 = name + '_3D_acceleration_6'
    acceleration6 = generate_three_IMU(acceleration6)
    output.extend(acceleration6)
    # gyroscope
    gyroscope = name + '_3D_gyroscope'
    gyroscope = generate_three_IMU(gyroscope)
    output.extend(gyroscope)
    # magnometer
    magnometer = name + '_3D_magnetometer'
    magnometer = generate_three_IMU(magnometer)
    output.extend(magnometer)
    # oreintation
    oreintation = name + '_4D_orientation'
    oreintation = generate_four_IMU(oreintation)
    output.extend(oreintation)
    return output

def load_IMU():
    output = ['time_stamp', 'activity_id', 'heart_rate']
    hand = 'hand'
    hand = generate_cols_IMU(hand)
    output.extend(hand)
    chest = 'chest'
    chest = generate_cols_IMU(chest)
    output.extend(chest)
    ankle = 'ankle'
    ankle = generate_cols_IMU(ankle)
    output.extend(ankle)
    return output

def load_subjects():
    output = pd.DataFrame()
    cols = load_IMU()
    for i in range(101, 110):
        path = f"subject{i}.dat"
        subject = pd.read_table(path, header=None, sep='\s+')
        subject.columns = cols
        subject['userId'] = i - 101
        output = output.append(subject, ignore_index=True)
    output.reset_index(drop=True, inplace=True)
    return output

data = load_subjects()
data = data.drop(data[data['activity_id']==0].index)
data = data.interpolate()
# fill all the NaN values in a coulmn with the mean values of the column
for colName in data.columns:
    data[colName] = data[colName].fillna(data[colName].mean())

sub_data, sub_label = [], []
for i in range(9):
    sub = data[data['userId'] == i]
    sub_data.append(sub.drop(['userId', 'activity_id', 'time_stamp'], axis=1).values)
    sub_label.append(sub['activity_id'].values)

label_dict = {12: 8, 13: 9, 16: 10, 17: 11, 24: 12}
for i in range(len(sub_data)):
    mean, var = z_norm(sub_data[i])
    sub_data[i] = (sub_data[i] - mean) / var
    sub_data[i][sub_data[i] != sub_data[i]] = 0

    sub_label[i] = [label_dict[sub_label[i][t]] - 1 if sub_label[i][t] >= 12 else sub_label[i][t] - 1 for t in range(len(sub_label[i]))] # substract 1 to start from 0

    round_len = len(sub_data[i]) // seq * seq
    sub_data[i] = np.array([sub_data[i][x:x+seq] for x in range(0, round_len, seq)])
    sub_label[i] = np.array([sub_label[i][x:x+seq] for x in range(0, round_len, seq)])

    label_ind = np.array([np.all(sub_label[i][n] == sub_label[i][n][0]) for n in range(len(sub_label[i]))])
    sub_data[i] = sub_data[i][label_ind]
    sub_label[i] = sub_label[i][label_ind][:,0]

with open(r"pamap2_data_" + str(seq) + ".pkl", "wb") as output_file:
    pickle.dump(sub_data, output_file)
with open(r"pamap2_label_" + str(seq) + ".pkl", "wb") as output_file:
    pickle.dump(sub_label, output_file)

coasxu · 2023-07-22T08:03:56Z

It's really helpufl! Thank you!

coasxu closed this as completed Jul 22, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Data path does not match the downloaded dataset #1

Data path does not match the downloaded dataset #1

coasxu commented Jul 21, 2023

jiayunz commented Jul 22, 2023

coasxu commented Jul 22, 2023

Data path does not match the downloaded dataset #1

Data path does not match the downloaded dataset #1

Comments

coasxu commented Jul 21, 2023

jiayunz commented Jul 22, 2023

coasxu commented Jul 22, 2023