# butina.R # # Example: # source("butina.R") # nn <- findnn(r) # butina(r, nn, output="butina.out") # # Reason for division into two functions: # findnn is slow enough, like 5 minutes. # This is a pain during the development of butina. # Final version will include findnn at start of butina. findnn <- function(r, cutoff=0.8) { mylen <- length(r[1,]) nn <- rep(0,mylen) for (i in 1:mylen) { # Count up the nearest neighbours # (subtract 1 because it includes itself) nn[i] <- sum(r[,i] > cutoff) - 1 } nn } butina <- function(r, nn, cutoff=0.8, output="butina.out") { sorted <- sort(nn,decreasing=TRUE,index.return=TRUE)$ix mylen <- length(r[1,]) seen <- rep(FALSE,mylen) clusterno <- 0 sink(output) i <- 1 while (i<=mylen) { clusterno <- clusterno+1 # After loop, i will either be >mylen or seen[sorted[i]]==FALSE while (i<=mylen & seen[sorted[i]]) { i <- i+1 } if (i<=mylen) { cat("Cluster no. ",clusterno,"\n") ans <- (r[sorted[i],] > cutoff) & !seen print(which(ans)) seen[ans] <- TRUE } i <- i+1 # Since we have just set seen[sorted[i]]==TRUE } sink(NULL) }