我试着把它分解成多个简单的步骤,希望你能从中学到一些技巧:
# Random data
df<-c("ABCBDBDBCBABABDBCBCBDBDBCBDBACDBCCADCDBCDACDDCDACBCDACABACDACABBBCCCBDBDDCACDDACADDDDACCADACBCBDCACD")
n<-10 # Number of cuts
set.seed(1)
# Pick n random numbers between 1 and the length of string-20
nums<-sample(1:(nchar(df)-20),n,replace=TRUE)
# Make your cuts
cuts<-sapply(nums,function(x) substring(df,x,x+20-1))
# Generate some names
nams<-paste0('NAME',1:n)
# Make it into a matrix, transpose, and then recast into a vector to get alternating names and cuts.
names.and.cuts<-c(t(matrix(c(nams,cuts),ncol=2)))
# Drop a file.
write.table(names.and.cuts,'file.txt',quote=FALSE,row.names=FALSE,col.names = FALSE)
# Pick how many changes are going to be made to each cut.
changes<-sample(1:2,n,replace=2)
# Pick that number of positions to change
pos.changes<-lapply(changes,function(x) sample(1:20,x))
# Find the letter at each position.
letter.at.change.pos<-lapply(pos.changes,function(x) substring(df,x,x))
# Make a function that takes any letter, and outputs any other letter from c(A-D)
letter.map<-function(x){
# Make a list of alternate letters.
alternates<-lapply(x,setdiff,x=c('A','B','C','D'))
# Pick one of each
sapply(alternates,sample,size=1)
}
# Find another letter for each
letter.changes<-lapply(letter.at.change.pos,letter.map)
# Make a function to replace character by position
# Inefficient, but who cares.
rep.by.char<-function(str,pos,chars){
for (i in 1:length(pos)) substr(str,pos[i],pos[i])<-chars[i]
str
}
# Change every letter at pos.changes to letter.changes
mod.cuts<-mapply(rep.by.char,cuts,pos.changes,letter.changes,USE.NAMES=FALSE)
# Generate names
nams<-paste0(nams,'.1')
# Use the matrix trick to alternate names.Drop a file.
names.and.mod.cuts<-c(t(matrix(c(nams,mod.cuts),ncol=2)))
write.table(names.and.mod.cuts,'file2.txt',quote=FALSE,row.names=FALSE,col.names = FALSE)
此外,您可以像这样使用strsplit 和replace,而不是rep.by.char 函数:
mod.cuts<-mapply(function(x,y,z) paste(replace(x,y,z),collapse=''),
strsplit(cuts,''),pos.changes,letter.changes,USE.NAMES=FALSE)