對於所有的值為 NA 值取代零或其他內容而言,需要轉換輸入的資料集使用函式的輸出資料集的資料
rxDataStep。
取代所有的值為 NA,"AirlineDemoSmall.xdf"的 xdf 檔案中的範例指令碼如下所示︰
# Create a data frame with missing valuesset.seed(17)
myDataF <- data.frame(x = rnorm(100), y = runif(100), z = rgamma(100, shape = 2))
xmiss <- seq.int(from = 5, to = 100, by = 5)
ymiss <- seq.int(from = 2, to = 100, by = 5)
myDataF$x[xmiss] <- NA
myDataF$y[ymiss] <- NA
# Convert into a xdf
myDataNA<-file.path(getwd(),"myDataNA.xdf")
trsfxdf<-rxDataStep(inData=myDataF,outFile=myDataNA,overwrite=TRUE)
writeLines("\n\nXdf Generated with random NA values")
print(rxGetInfo(myDataF, n = 15)$data) # Test ouput data
##
## Use from here if there is an existing xdf.
## replace myDataNA with your xdf file
##
writeLines("\n\nVariables that contains NA values (Missing Observations)")
(mySum <- rxSummary(~., data = myDataNA)$sDataFrame)
# Find variables that are missing
transVars <- mySum$Name[mySum$MissingObs > 0]
print(transVars) #Test detected variables
# create a function to replace NA vals with mean
NAreplace <- function(dataList) {
replaceFun <- function(x) {
x[is.na(x)] <- replaceValue
return(x)
}
dataList <- lapply(dataList, replaceFun)
return(dataList)
}
#
myDataRMV<-file.path(getwd(),"myDataRMV.xdf") # Replace Missing Value
trsfxdf<- rxDataStep(inData = myData1, outFile = myDataRMV,
transformFunc = NAreplace,
transformVars = transVars,
transformObjects = list(replaceValue = "REPLACED MISSING VALUE"),
overwrite=TRUE)
writeLines("\n\nTransformed xdf with NA replaced by Value")
print(rxGetInfo(myDataRMV, n=15)$data) # Test output data