...
 
Commits (3)
import numpy as np
#### data ####
# replace this with the directory containing the data
data_directory = "."
# column names
column_names = ["fp1", "fp2", "f3", "f4", "f7", "f8", "c3", "c4", "p3", "p4", "o1", "o2", "t3", "t4", "t5", "t6", "fz", "cz", "pz"]
# lists of different populations
pid_testlist = np.arange(1, 4)
pid_noConcussion = np.arange(1, 34) # patients 1-33 had no concussions
pid_3stepProtocol = np.arange(34, 45) # patients 34-44 used the 3 step protocol
pid_testRetest = np.arange(45, 55) # patients 45-55 were freshmen, 1/2 male, 1/2 female, retested within a few days
pid_concussion = np.arange(55, 99) # patients 55-98 were assessed for concussion
#### experiment settings ####
# epoch size for generating examples for each feature; 256 equates to 1 second
epoch_size = 512
# features to use. For each tuple:
# first element is function name in feature_extractors.py and second element are positional arguments for the function
feature_functions = [
("correlation", []),
]
embedding_args = {
"mode": "pca",
"n_components": 2,
}
import os
import numpy as np
import pandas as pd
import scipy as sp
from feature_extractors import extractors
from preprocessing import extractWaves
from matplotlib import pyplot as plt
from config import data_directory
plot_ignore_columns = ["window", "time"]
from feature_extractors import extractors
from preprocessing import extractWaves
from config import data_directory, column_names
class EEGSession():
......@@ -18,6 +13,7 @@ class EEGSession():
self.id = str(id)
self.raw = raw
self.artifacts = artifacts
self.window = None
self.window_size = None
self.n_windows = None
......@@ -25,20 +21,19 @@ class EEGSession():
"""
for each channel, replaces artifact frames from the raw data frame. artifacts are indicated by 1's in self.artifacts for the same frame/channel
"""
cols = [col for col in self.raw.columns if col not in plot_ignore_columns]
# replace each colums with zeros where the artifacts matrix is 1's:
for i, col in enumerate(cols):
for i in range(self.raw.shape[1]):
# make sure the artifacts file is the same length as the raw file. this is not true for some datasets
if len(self.artifacts.as_matrix()[:, i]) == len(self.raw[col].as_matrix()):
if len(self.artifacts[:, i]) == len(self.raw[:, i]):
if mode == "zero":
replacements = np.zeros_like(self.raw[col].as_matrix())
replacements = np.zeros_like(self.raw[:, i])
elif mode == "normal":
# mean of everything that is not anomalous
chan_mean = np.mean(self.raw[col].as_matrix()[[np.where(self.raw[col].as_matrix())]])
chan_std = np.std(self.raw[col].as_matrix()[[np.where(self.raw[col].as_matrix())]])
replacements = np.random.normal(chan_mean, chan_std, size = self.raw[col].as_matrix().shape)
self.raw[col] = pd.Series(np.where(self.artifacts[col].as_matrix() == 1, replacements, self.raw[col].as_matrix()), dtype=np.float64)
if np.any(pd.isnull(self.raw[col])):
chan_mean = np.mean(self.raw[:, i][np.where(self.raw[:, i])])
chan_std = np.std(self.raw[:, i][np.where(self.raw[:, i])])
replacements = np.random.normal(chan_mean, chan_std, size=self.raw[:, i].shape)
self.raw[:, i] = np.where(self.artifacts[:, i] == 1, replacements, self.raw[:, i])
if np.any(np.isnan(self.raw[:, i])):
print("{}: NaNs exist after artifact removal, setting raw to 'None'".format(self.id))
self.raw = None
return
......@@ -57,9 +52,8 @@ class EEGSession():
for i in range(int(len(self.raw)/self.window_size + 1)):
window.extend([i] * self.window_size)
window = np.array(window)
self.raw['window'] = pd.Series(window[:len(self.raw)], index=self.raw.index)
self.raw.set_index('window')
self.n_windows = np.unique(self.raw['window'])
self.window = window[:len(self.raw)]
self.n_windows = np.unique(self.window)
def plot_channels(self, channels="all", end=-1):
"""
......@@ -70,14 +64,14 @@ class EEGSession():
:type end: int
"""
if end == -1:
end = len(self.raw)
end = self.raw.shape[0]
if channels == "all":
channels == [col for col in self.raw.columns if col not in plot_ignore_columns]
channels = np.arange(self.raw.shape[1])
frames = range(end)
f, axes = plt.subplots(len(channels))
for i, axis in enumerate(axes):
colname = channels[i]
axis.plot(frames/256., self.raw[colname][:end], 'k')
colname = column_names[i]
axis.plot(frames/256., self.raw[:end, i], 'k')
#axis.set_title(colname)
axis.text(.5, .5, colname, horizontalalignment='center',
transform=axis.transAxes, bbox=dict(facecolor='white', alpha=0.5) )
......@@ -99,19 +93,21 @@ class EEGSession():
if end == -1:
end = self.window_size
if windows == "all":
windows = np.unique(self.raw["window"])
windows = np.unique(self.window)
frames = range(end)
alpha = 0.5 / np.log(len(windows)) if len(windows) > 1 else 1
if channels == "all":
channels = [col for col in self.raw.columns if col not in plot_ignore_columns]
channels = np.arange(self.raw.shape[1])
n_channels = len(channels)
f, axes = plt.subplots(n_channels)
for i, axis in enumerate(axes):
colname = channels[i]
channel = self.raw[colname]
colname = column_names[i]
channel = self.raw[:, i]
for window in windows:
w = channel.loc[self.raw["window"] == window]
axis.plot(frames[:len(w)]/256., w, alpha=alpha, color='k')
axis.text(.5, .5, colname, horizontalalignment='center',
transform=axis.transAxes, bbox=dict(facecolor='white', alpha=0.5) )
plt.show()
def plot_dataframe(self, df_name, channels=""):
......@@ -119,14 +115,14 @@ class EEGSession():
def get_examples(self, feature_args, epoch_size="all", channels="all", filtered_waves="true"):
if channels == "all":
channels = [col for col in self.raw.columns if col not in plot_ignore_columns]
channels = np.arange(self.raw.shape[1])
if filtered_waves:
extractWaves(self)
if epoch_size == "all":
epoch_size = self.waves.values()[0].shape[0]
n_epochs = int(self.waves.values()[0].shape[0] / epoch_size)
examples = []
wave_matrices = {k: v.as_matrix() for k, v in self.waves.items()}
wave_matrices = {k: v[:, channels] for k, v in self.waves.items()}
for i in range(n_epochs):
feature_list = []
for wave_name, wave_matrix in wave_matrices.items():
......
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
class Embedding(object):
def __init__(self, type="pca", **kwargs):
self.type = type
self.n_components = kwargs["n_components"]
if "n_components" in kwargs:
self.n_components = kwargs["n_components"]
def train(self, train_data):
if self.type == "pca":
pca = PCA(n_components=self.n_components)
pca.fit(train_data)
self.pca = pca
elif self.type == "tsne":
tsne = TSNE(n_components=self.n_components)
tsne.fit(train_data)
self.tsne = tsne
def embed(self, train_data):
# Note: TSNE cannot transform after being fit, it can only fit_transform
if self.type == "pca":
return self.pca.transform(train_data)
import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from config import data_directory
from eeg_session import EEGSession
......@@ -73,13 +71,12 @@ class Patient(object):
:rtype: EEGSession
"""
try:
raw = pd.read_csv(os.path.join(data_directory, filename + ".raw"), names=self.columns, dtype=np.float64)
raw["time"] = pd.Series([i/256. for i in range(len(raw.index))])
raw = np.genfromtxt(os.path.join(data_directory, filename + ".raw"), delimiter=',', dtype=np.float64)
except:
print("Can't load data file: {}".format(filename + ".raw"))
return None
try:
artifacts = pd.read_csv(os.path.join(data_directory, filename + ".art"), names=self.columns, dtype=np.float64)
artifacts = np.genfromtxt(os.path.join(data_directory, filename + ".art"), delimiter=',', dtype=np.float64)
except:
print("Can't load data file: {}".format(filename + ".art"))
return None
......@@ -108,8 +105,8 @@ def main():
#patient.season_start.plot_windows(windows=np.arange(10), channels=["c3", "cz", "c4", "p3", "pz", "p4"])
prep.extractWaves(patient.pre_test, n=4001, samplingRate=256, wave='alpha')
patient.pre_test.extract_windows()
patient.pre_test.plot_windows(windows=np.arange(10), channels=["c3", "cz", "c4", "p3", "pz", "p4"])
#patient.season_start.plot_channels(channels=["c3", "cz", "c4", "p3", "pz", "p4"], end=256)
patient.pre_test.plot_windows(windows=np.arange(10), channels=[1, 2, 3, 4, 5])
#patient.season_start.plot_channels(channels=[1, 2, 3, 4, 5], end=256)
import pdb; pdb.set_trace()
if __name__ == "__main__":
......
import pandas as pd
from scipy import signal
import numpy as np
ignore_columns = ["time", "window"]
def stft(session, **kwargs):
"""
......@@ -14,9 +12,9 @@ def stft(session, **kwargs):
"""
if not hasattr(session, "stft"):
session.stft = {}
columns = [col for col in session.raw.columns if col not in ignore_columns]
for col in columns:
session.stft[col] = signal.stft(session.raw[col])
for i in range(session.raw.shape[1]):
session.stft[i] = signal.stft(session.raw[i], **kwargs)
def extractWaves(session, n=4001, samplingRate=256, wave='all'):
"""
......@@ -30,10 +28,13 @@ def extractWaves(session, n=4001, samplingRate=256, wave='all'):
:param session: session of eeg data with a raw instance variable containing a pandas data frame of channels
n : The number of filter coefficients used to construct thge filter (Higher number gives a more accurate filter)
samplingRate : The sampling rate of the EEG Data, to which the filter will be applied
wave : The waveform, defines the bands of the filter
:type session: EEGSession
:param n: The number of filter coefficients used to construct thge filter (Higher number gives a more accurate filter)
:type n: int
:param samplingRate: The sampling rate of the EEG Data, to which the filter will be applied
:type samplingRate: int
:param wave: The waveform, defines the bands of the filter
:type wave: np.ndarray
:return: 0 if success, 1 if it failed
:rtype: int
"""
......@@ -43,25 +44,24 @@ def extractWaves(session, n=4001, samplingRate=256, wave='all'):
if (wave == 'all'):
waves = ['delta', 'theta', 'alpha', 'beta', 'gamma']
for i in waves:
b[i] = FIR(n,samplingRate, i)
b[i] = FIR(n, samplingRate, i)
else:
b[wave] = FIR(n,samplingRate, wave)
b[wave] = FIR(n, samplingRate, wave)
if not hasattr(session, "waves"):
# create a dictionary of pandas dataframes
session.waves = {}
chop = int((n-1)/2)
columns = [col for col in session.raw.columns if col not in ignore_columns]
columns = np.arange(session.raw.shape[1])
out_data = []
for key in b:
df = pd.DataFrame()
for col in columns:
for i in columns:
# apply filter, via convolution
s = pd.Series(np.convolve(session.raw[col], b[key], mode='valid'))
df['_'.join([col,key])] = s
df['time'] = session.raw['time'][chop:-chop].reset_index(drop=True)
session.waves[key] = df
s = np.convolve(session.raw[:, i], b[key], mode='valid')
out_data.append(np.expand_dims(s, -1))
session.waves[key] = np.hstack(out_data)
return 0
def FIR(n=4001, samplingRate=256, wave='alpha'):
"""
......