/* Copyright 2007 Brendan Halpin brendan.halpin@ul.ie Distribution is permitted under the terms of the GNU General Public Licence */ mata: numeric degenne_setup (data, cumdur, nstates, maxlength) { real scalar i,j,k,nobs nobs = rows(data) cumdur=J(nobs*nstates,maxlength,0) for (i = 1; i<=nobs; i++) { for (j = 1; j<=nstates; j++) { cumdur[(i-1)*nstates+j,1] = (data[i,1]==j) for (k=2; k<=maxlength; k++) { cumdur[(i-1)*nstates+j,k] = cumdur[(i-1)*nstates+j,k-1] + (data[i,k]==j) } } } } numeric degenne(seq1,seq2,cumdur,nstates,maxlength) { real matrix sxx, syy, sxy, theta sxx = J(1,nstates,1)*cumdur[((seq1-1)*nstates+1..seq1*nstates),.]:^2 syy = J(1,nstates,1)*cumdur[((seq2-1)*nstates+1..seq2*nstates),.]:^2 sxy = cumdur[((seq1-1)*nstates+1..seq1*nstates),.]:*cumdur[((seq2-1)*nstates+1..seq2*nstates),.] sxy = J(1,nstates,1)*sxy theta = acos(sxy:/sqrt(sxx:*syy)) return(theta*J(maxlength,1,1)) } /* Earlier observations dominate the standard method so I want to find a way of weighting their contribution. Empirically a weight of 1.2^i or so seem to make sense. The weight vector should really be prepared outside the function and passed to it. */ numeric degenne_w(seq1,seq2,cumdur,nstates,maxlength) { real matrix sxx, syy, sxy, weight, theta real scalar i sxx = J(1,nstates,1)*cumdur[((seq1-1)*nstates+1..seq1*nstates),.]:^2 syy = J(1,nstates,1)*cumdur[((seq2-1)*nstates+1..seq2*nstates),.]:^2 sxy = cumdur[((seq1-1)*nstates+1..seq1*nstates),.]:*cumdur[((seq2-1)*nstates+1..seq2*nstates),.] sxy = J(1,nstates,1)*sxy theta = acos(sxy:/sqrt(sxx:*syy)) weight = 1.215:^(1..maxlength) return(theta*weight') } /* Alternative to angle, Euclidean distance at each time point */ numeric cd_hamming(seq1,seq2,cumdur,nstates,maxlength) { return(sum(sqrt(((cumdur[((seq1-1)*nstates+1..seq1*nstates),.] :- cumdur[((seq2-1)*nstates+1..seq2*nstates),.]):^2)' * J(4,1,1) )) ) } function do_degenne(string vl, real scalar nstates, real scalar length, string pwdist, string degtype ) { st_view(X=.,.,(tokens(vl))) results=st_matrix(pwdist) nobs = st_nobs() degenne_setup(X,cumul=.,nstates,length) for (i=1; i