Commit c2c92902 authored by Alex Fout's avatar Alex Fout

Merge remote-tracking branch 'origin/master'

parents de745f6d 1381a729
......@@ -37,26 +37,28 @@ def extractWaves(session, n=4001, samplingRate=256, wave='all'):
:return: 0 if success, 1 if it failed
:rtype: int
"""
# Create a dictionary of filter coefficients, the keys are waveforms
b = {}
if (wave == 'all'):
waves = ['delta', 'theta', 'alpha', 'beta', 'gamma']
for i in waves:
b[i] = FIR(n,samplingRate, wave)
b[i] = FIR(n,samplingRate, i)
else:
b[wave] = FIR(n,samplingRate, wave)
if not hasattr(session, "waves"):
# create a dictionary of pandas dataframes
session.waves = {}
chop = int((n-1)/2)
columns = [col for col in session.raw.columns if col not in ignore_columns]
for key in b:
df = pd.DataFrame()
for col in columns:
# apply filter, via convolution
df = pd.DataFrame()
s = pd.Series(np.convolve(session.raw[col], b[key], mode='valid'))
df["_".join([col,wave])] = s
df['_'.join([col,key])] = s
df['time'] = session.raw['time'][chop:-chop].reset_index(drop=True)
session.waves[key] = df
return 0
......
library(Hmisc)
#########################################################################
# PART 1
#########################################################################
file_names <- read.table("./data/files_raw1.txt", stringsAsFactors = F, header = F)
file_names2 <- unlist(file_names)
f_count <- length(file_names2)
col_names <- c("fp1","fp2","f3","f4","f7",
"f8","c3","c4","p3","p4",
"o1","o2","t3","t4","t5",
"t6","fz","cz","pz")
cr_all <- data.frame()
for (i in 1:f_count) {
fn_raw <- file_names2[i]
data_raw <- read.csv(paste0("./data/",fn_raw,".raw"))
colnames(data_raw) <- col_names
fn_art <- paste0("./data/",fn_raw,".art")
if(file.exists(fn_art)) {
data_art <- read.csv(fn_art)
for (k in 1:19) {
mask <- data_art[,k] == 1
data_raw[mask,k] <- NA
}
}
c_res <- c()
cr <-rcorr(as.matrix(data_raw))$r
# cr <- cor(data_raw)
for(j in 1:18){
c_res <- c(c_res, cr[j, (j+1):19])
}
cr_all <- rbind(cr_all, c_res)
row.names(cr_all)[i]<-fn_raw
data_desc <- paste(i, fn_raw)
print(data_desc)
}
cr_col_names <- c()
for (j in 1:18) {
for (k in (j+1):19){
cr_col_names <- c(cr_col_names, paste0(col_names[j],col_names[k]))
}
}
colnames(cr_all) <- cr_col_names
# Save the CSV
write.csv(cr_all, "cor_data_cleaned.csv")
#########################################################################
# PART 2
#########################################################################
# read CSV again
cr_all <- read.csv("cor_data_cleaned.csv")
cr_row_names <- cr_all[,1]
cr_all <- cr_all[,2:172]
rownames(cr_all) <- cr_row_names
# Heatmap
heatmap(as.matrix(cr_all), Colv=NA)
# correct NA value
sapply(cr_all, function(x) sum(is.na(x)))
cr_all$f7t3[is.na(cr_all$f7t3)] <-0
# PCA
pca<-prcomp(cr_all)
print(pca)
plot(pca, type = "l")
# Read the labels file
labels <- read.csv("labels.csv")
pca_data <- as.data.frame(pca$x)
pca_data$fn <- rownames(pca_data)
# left join
pca_data <- merge(x = pca_data, y = labels, by = "fn", all.x = TRUE)
# detect outliers
library(anomalyDetection)
pca_data$md <- mahalanobis_distance(pca_data[,2:3])
pca_data$anomaly <- pca_data$md > 5
# plot
p <- ggplot(pca_data, aes(PC1, PC2, color=anomaly)) + geom_point() +
geom_text(aes(label = ifelse(anomaly==TRUE,as.character(fn),''), hjust=0, vjust=0))
p
import patient
import preprocessing
import string
import os
import pandas as pd
import numpy as np
import argparse
waves = ['delta', 'theta', 'alpha', 'beta', 'gamma']
def getLetter(i):
return string.ascii_lowercase[i]
def printINFO(v, string):
if (v):
print("INFO: {}".format(string))
def SaveWave2csv(pid, v=False, extension='raw', inOneCSV=False, nfilterCoeff=4001):
printINFO(v, "Patient ID: {}".format(pid))
p = patient.Patient(pid)
printINFO(v, "Extracting waves for season_start!")
preprocessing.extractWaves(p.season_start, n=nfilterCoeff, samplingRate=256, wave='all')
printINFO(v, "Extracting waves for concussion!")
for i in range(len(p.concussions)):
preprocessing.extractWaves(p.concussions[i], n=nfilterCoeff, samplingRate=256, wave='all')
printINFO(v, "Extracting waves for season_end!")
preprocessing.extractWaves(p.season_end, n=nfilterCoeff, samplingRate=256, wave='all')
printINFO(v, "Saving extracting waves to files!")
if (inOneCSV):
# Save season_start to csv
fname = "".join([pid,'a_waves.', extension])
fpath = os.path.join(path,fname)
tmp = list(p.season_start.waves.values())
for j in range(len(tmp)-1):
tmp[j].drop('time', axis=1, inplace=True)
df = pd.concat(tmp, axis=1)
printINFO(v,"Saving file: {}".format(fpath))
df.to_csv(fpath, index=False)
# Save concussian to csv
for i in range(len(p.concussions)):
fname = "".join([pid, getLetter(i+1), '_waves.', extension])
fpath = os.path.join(path,fname)
tmp = list(p.concussions[i].waves.values())
for j in range(len(tmp)-1):
tmp[j].drop('time', axis=1, inplace=True)
df = pd.concat(tmp, axis=1)
printINFO(v,"Saving file: {}".format(fpath))
df.to_csv(fpath, index=False)
# Save season_end to csv
fname = "".join([pid, getLetter(i+2), '_waves.', extension])
fpath = os.path.join(path,fname)
tmp = list(p.season_end.waves.values())
for j in range(len(tmp)-1):
tmp[j].drop('time', axis=1, inplace=True)
df = pd.concat(tmp, axis=1)
printINFO(v,"Saving file: {}".format(fpath))
df.to_csv(fpath, index=False)
else:
# Will create one csv file for each waveform
for wave in waves:
# save season_start to csv
fname = "".join([pid,'a_',wave,'.', extension])
fpath = os.path.join(path,fname)
printINFO(v,"Saving file: {}".format(fpath))
p.season_start.waves[wave].to_csv(fpath, index=False)
# save concussion to csv
for i in range(len(p.concussions)):
fname = "".join([pid, getLetter(i+1), '_', wave,'.', extension])
fpath = os.path.join(path,fname)
printINFO(v,"Saving file: {}".format(fpath))
p.concussions[i].waves[wave].to_csv(fpath, index=False)
# save season_end to csv
fname = "".join([pid, getLetter(i+2), '_', wave,'.', extension])
fpath = os.path.join(path,fname)
printINFO(v,"Saving file: {}".format(fpath))
p.season_end.waves[wave].to_csv(fpath, index=False)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-path", required=True, type=str, help="the folder path to save the csv's")
parser.add_argument("-v", required=False, action="store_true", help="verbose option to print INFO")
parser.add_argument("-extension", required=False, type=str, help="the file extension to be used, default is raw")
parser.add_argument("-pid", required=False, type=int, help="the pid to be saved, if not given it will save for all the patients id's")
parser.add_argument("-nfilterCoeff", required=False, type=int, help="number of filter coefficients")
parser.add_argument("-csvPerWave", required=False, action="store_true", help="save each waveform to different csv file")
args = parser.parse_args()
path = args.path
if args.nfilterCoeff:
nfilterCoeff = args.nfilterCoeff
else:
nfilterCoeff = 4001
if args.extension:
extension = args.extension
else:
extension = 'raw'
if args.nfilterCoeff:
nfilterCoeff = args.nfilterCoeff
if args.pid:
pid = [args.pid]
else:
pid = np.arange(1,99)
for i in pid:
SaveWave2csv(str(i), v=args.v, inOneCSV=not(args.csvPerWave), extension=extension, nfilterCoeff=nfilterCoeff)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment