% Script used for demonstrations of data analysis in MATLAB
% Simply creates some fake experimental logfiles in text format,
% reads them into Matlab,
% does descriptive statistics on data,
% reformats data into group matrices,
% and performs example statistical tests.
%
% written by christian ruff, november 2006


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% (0) Create data

clear all;

subjz   = rand(1,12)+1;
trialz  = ones(30,1);
fac1    = [0.8 1.2];
fac2    = [0.8 1.2];
condz   = ['var1','var2'];
mf      = 50;
nfac1   = 0.3;
nfac2   = 0.3;
meanz = (fac1*mf)'*(fac2*mf);

data = []; datn = [];
for i_sub = 1:length(subjz)
     dat                = trialz*[subjz(i_sub)*meanz(:)]' + (rand(30,4)-0.5)*1000;
     dat(1)             = 7500;
     dat                = [dat(:) kron(fullfact([2 2]),ones(30,1))];
     dat                = dat(randperm(size(dat,1)),:);
     datn               = 2000*ones(size(dat));
     datn(:,2:end)      = dat(:,2:end);
     datn(:,end+1)      = 1;
     dat(:,end+1)       = 2;
     dats               = zeros(size(dat,1)*2,size(dat,2));
     dats(1:2:end,:)    = datn;
     dats(2:2:end,:)    = dat;
     d{i_sub}           = [cumsum(dats(:,1)) dats];
     ds3                = {'pictures','sounds'};
     ds4                = {'living/nonliving','count_letters'};
     ds5                = {'stimulus','response'};
     fid = fopen(sprintf('subject_%d.dat',i_sub),'w');
     subj = char(70:80); subj = subj(randperm(10));
     fprintf(fid,'subject %s\n time %s\n',subj,datestr(now));
     fprintf(fid,'%s\t%s\t%s\t%s\t%s\n','time','timediff','task1','task2','event');
     for i_dat = 1:size(dat,1)
         a{1} = d{i_sub}(i_dat,1);
         a{2} = d{i_sub}(i_dat,2);
         a{3} = ds3{d{i_sub}(i_dat,3)};
         a{4} = ds4{d{i_sub}(i_dat,4)};
         a{5} = ds5{d{i_sub}(i_dat,5)};
         fprintf(fid,'%f\t%f\t%s\t%s\t%s\n',a{1},a{2},a{3},a{4},a{5});
     end
     fclose(fid);
end

format bank;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% (1) Read data into MATLAB

for i_sub = 1:length(subjz)

    % this works, but returns data as cellstring. This is perfect for
	% importing strings such as subject names etc., but bad for numbers
    import1  = importdata(['subject_' num2str(i_sub) '.dat']);
    D(i_sub).subject = import1{1};
    D(i_sub).date    = import1{2};

    % read subject's file into separate column variables
    % ignore the first three lines for this process
    [time RT var1 var2 type] = textread(['subject_' num2str(i_sub) '.dat'],'%n%n%s%s%s','headerlines',3);

    % find out the format of the variables
    whos import1 time RT var1 var2 type

    % transform cellstrings into column arrays of strings
    var1 = strvcat(var1{:});
    var2 = strvcat(var2{:});
    type = strvcat(type{:});
    whos time RT var1 var2 type

    % transform strings into numbers
    for i_lines = 1:length(var1)
        var1_num(i_lines,1) = double(var1(i_lines,1)=='p')+1;
        var2_num(i_lines,1) = double(var2(i_lines,[1])=='l')+1;
        type_num(i_lines,1) = double(type(i_lines,1)=='s')+1;
    end
    whos time RT var1_num var2_num type_num

    % assemble one big structure
    D(i_sub).data = [time RT var1_num var2_num type_num];

end
% D now contains the names, testing times, and data for each subject


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% (2) Inspect data for distribution and outliers
% The function descriptives (given at the bottom of this file) will have to be
% copied into a separate m-file called descriptives. This file will have to
% be saved in present working directory, or on the MATLAB path

for i_sub = 1:length(subjz)
    D(i_sub).desc_all  = descriptives( D(i_sub).data(:,2));
end
% D now also contains descriptive statistics for each subject


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% (3) Rearrange data for multi-subject analyses

% create big array of the trial data, as filed of the multi-subject
% structure DATA (note that the strucure D cannot be used further, since it
% is structured by single subjects
DATA.data = cat(3,D.data);


% this generally allows us to handle the data efficiently over subjects
% e.g., mean(data(:,2,:)) % mean of RT for each subject
% e.g., mean(mean(data(:,2,:))) % mean of RT acorss all subjects


% only copy the RT data and the condition indices to new array
DATA.subjs = [];
for i = 1:size(DATA.data,3)
    % where are the response trials?
    resp_trials         = find(DATA.data(:,5,i)==1);
    % get only the rows containing response trials, from columns 2:4, for subject i
    DATA.subjs(:,:,i)   = DATA.data(resp_trials,[2:4],i);
    % sort the rows by the condition variables in columns 2 and 3
    DATA.subjs(:,:,i)   = sortrows(DATA.subjs(:,:,i),[2 3]);
end
whos DATA.subjs


% create array of mean RTs, with subjects as rows and conditions as columns
% This is useful for some statistical commands (see below)
DATA.data_allsubjs = [];
DATA.data_allsubjs_matrix = [];
for i = 1:12
    for j = 1:2
        for k = 1:2
            fac = find(DATA.subjs(:,2,i)==j & DATA.subjs(:,3,i)==k);
            DATA.allsubjs_matrix(i,(j-1)*2+k) = mean(DATA.subjs(fac,1,i),1);
        end
    end
end

% create one single column vector of data, required for other analyses
DATA.allsubjs = reshape(DATA.allsubjs_matrix,48,1);


%now append matrix of experimental factors and subjectnumber
groups = [fullfact([12 2 2]) fullfact([12 4])];
DATA.allsubjs = [DATA.allsubjs groups(:,[2 3 5 4])]; % only keep one subject variable
whos DATA.allsubjs
% DATA now contains all multi-subject data matrices used for the different
% statistical test used below


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% (4) Perform example statistical tests (these obvioulsy don't all make
% sense for the data, but are peformed for demsontrative purposes)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Examples for ANOVAs


% one-way ANOVA with four factors
group      = DATA.allsubjs(:,4);
[S.anova1.p S.anova1.t S.anova1.s] = anova1(DATA.allsubjs(:,1),group);

% 2 x 2 ANOVA
group      = {DATA.allsubjs(:,[2]),DATA.allsubjs(:,[3])};
groupnames = {'var1','var2'}
[S.anovan.p S.anovan.t S.anovan.s S.anovan.t] = anovan(DATA.allsubjs(:,1),group,2,3,groupnames);

% non-parametric anova
[S.krusk.p S.krusk.t S.krusk.s] = kruskalwallis(reshape(DATA.allsubjs(:,1),12,4));

% non-parametric repeated measures anova
[S.fried.p S.fried.t S.fried.s] = friedman(reshape(DATA.allsubjs(:,1),12,4));

% within subject-ANOVAs and sphericity corrections can be found on the WEB, see Power-Point slides


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Examples for mean comparisons

% post-hoc contrasts after ANOVA
S.anova1.multcomp = multcompare(S.anova1.s);

% plot means with confidence intervals
[S.grp.mean,S.grp.sem,S.grp.counts,S.grp.names] = grpstats(DATA.allsubjs(:,1),DATA.allsubjs(:,4),0.05)

% pick two groups
g1 = find(DATA.allsubjs(:,4)==1);
g2 = find(DATA.allsubjs(:,4)==2);

% repeated measures t-test
[S.trep.h,S.trep.p,S.trep.c,S.trep.s] = ttest(DATA.allsubjs(g1,1)-DATA.allsubjs(g2,1),0.05);

% two sample t-test
[S.t.h,S.t.p,S.t.c,S.t.s] = ttest2(DATA.allsubjs(g1,1),DATA.allsubjs(g2,1),0.05);

% wilcoxon test
[S.wilc.p,S.wilc.h,S.wilc.s] = signrank(DATA.allsubjs(g1,1),DATA.allsubjs(g2,1),0.05);

% ranksum test (nonparametric comparison of two independnt groups)
[S.rank.p,S.rank.h,S.rank.s] = ranksum(DATA.allsubjs(g1,1),DATA.allsubjs(g2,1),0.05);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Examples for tests of association

% use subject number as example predictor of RTs in condition 1
g1   = find(DATA.allsubjs(:,4)==1);
pred = DATA.allsubjs(g1,5);

% correlation
[S.corr.c S.corr.p S.corr.lc S.corr.uc] = corrcoef([DATA.allsubjs(g1,1) pred])

% regression
[S.regress.B S.regress.Bint S.regress.R S.regress.Rint S.regress.S] = regress(DATA.allsubjs(g1,1),[pred ones(12,1)],0.05)

% S now contains all statistical indices (p-values etc.) derived from the
% tests performed


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Examples for use of distribution functions

% find significance of F-value with given degrees of freedom
% this is two-sided testing, so p-values have to be multiplied by 2
F1 = 4.6; df1=1; df2 = 12;
p = 2*(1-fcdf(F1,df1,df2));

% and revert the p value back to F-value
F2 = finv((1-p/2),df1,df2);


% plot normal ditributions with mean 0 and sd 1 between -3 and 3
R = [-3:0.1:3];
figure;
subplot(1,2,1); plot(R,normpdf(R,0,1));
subplot(1,2,2); plot(R,normcdf(R,0,1));

% derive random numbers from a normal distributions
rnum = randn(1,10000);
figure; histfit(rnum)

% want to close all figures? Then simply type "close all"

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function D = descriptives(data)
% calculates descriptive statistics for input data (nx1 or 1xn vector)
%
% also plots density estimates and boxplots for input data
%
% this function has to be copied into a separate m-file, and copied to a driectory on the matlab path
%
% written by christian ruff, november 2006

D.mean       = nanmean(data);
D.median     = nanmedian(data);
D.trimmean   = trimmean(data,10);
D.min        = nanmin(data);
D.max        = nanmax(data);
D.prctile    = prctile(data,[0:10:100]);
D.range      = range(data);
D.variance   = var(data);
D.stdev      = nanstd(data);
D.skewness   = skewness(data);
D.kurtosis   = kurtosis(data);
D.abnormal   = kstest(data);


figure;
subplot(2,2,1); histfit(data); title('distribution of data (blue) and normal distribution (red)');
subplot(2,2,2); normplot(data);
if lillietest(data,0.05) == 1
   title('normplot of data distribution (data are not normally distributed)');
else
    title('normplot of data distribution (data are normally distributed)');
end

subplot(2,2,3); cdfplot(data); title('cumulative distribution of data');
subplot(2,2,4); boxplot(data); title('boxplot of data');

return