Getting & Cleaning Data for Coursera "get data-015" course
setwd("~/Documents/WGSN/Personal Development/Coursera - data scientists toolbox/Getting & Cleaning Data/Assignment/UCI HAR dataset")
if(!file.exists("./data")){dir.create("./data")} fileURL1="https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv" download.file(fileURL1,destfile="./data/GCD_wk3_q1.csv",method="curl") data_q1<-read.csv("./data/GCD_wk3_q1.csv",stringsAsFactors=False)
library(tidyr) library(dplyr)
test_sub <- "./test/subject_test.txt" test_act <- "./test/y_test.txt" test_val <- "./test/X_test.txt" train_sub <- "./train/subject_train.txt" train_act <- "./train/y_train.txt" train_val <- "./train/X_train.txt" val_names <- './features.txt' act_labels <- "./activity_labels.txt"
df_act_labels <- read.csv2(act_labels, stringsAsFactors=FALSE, header=FALSE) df_val_names <- read.csv2(val_names, stringsAsFactors=FALSE, header=FALSE,col.names="Measurement")
df_val_names$Measurement<-gsub("\(\)","",df_val_names$Measurement) df_val_names$Measurement<-gsub("\(","",df_val_names$Measurement) df_val_names$Measurement<-gsub("\)","",df_val_names$Measurement) df_val_names$Measurement<-gsub(",","",df_val_names$Measurement) df_val_names$Measurement<-gsub("-","",df_val_names$Measurement) df_val_names$Measurement<-gsub("BodyBody","Body",df_val_names$Measurement) df_val_names$Measurement<-gsub("mean","Mean",df_val_names$Measurement) df_val_names$Measurement<-gsub("std","Std",df_val_names$Measurement) df_val_names$Measurement<-gsub("\\","",df_val_names$Measurement)
df_test_sub<-read.csv2(test_sub,stringsAsFactors=FALSE, header=FALSE, col.names="Subject") df_test_act<-read.csv2(test_act,stringsAsFactors=FALSE, header=FALSE, col.names ="Activity") df_test_val<-read.csv2(test_val,stringsAsFactors=FALSE,header=FALSE, dec=".",col.names=df_val_names$Measurement) df_train_sub<-read.csv2(train_sub,stringsAsFactors=FALSE, header=FALSE, col.names="Subject") df_train_act<-read.csv2(train_act,stringsAsFactors=FALSE, header=FALSE, col.names ="Activity") df_train_val<-read.csv2(train_val,stringsAsFactors=FALSE,header=FALSE, dec=".",col.names=df_val_names$Measurement)
df_act_labels <- read.csv2(act_labels, stringsAsFactors=FALSE, header=FALSE)
df_val_names<-df_val_names %>% separate(Measurement,c("Prefix","Measurement"),sep=" ") %>% select(Measurement)
df_act_labels<-df_act_labels %>% separate(V1,c("Prefix","Activity"),sep=" ") y<-nrow(df_act_labels)
for (i in 1:nrow(df_act_labels)){ df_test_act$Activity<-gsub(df_act_labels[[i,1]],df_act_labels[[i,2]],df_test_act$Activity) }
for (i in 1:nrow(df_act_labels)){ df_train_act$Activity<-gsub(df_act_labels[[i,1]],df_act_labels[[i,2]],df_train_act$Activity) }
tmp<-cbind(df_test_sub,df_test_act)
tmp$DataSet<-"Test" tmp<-cbind(tmp,df_test_val)
tmp2<-cbind(df_train_sub,df_train_act) tmp2$DataSet<-"Train" tmp2<-cbind(tmp2,df_train_val)
tmp3<-rbind(tmp,tmp2)