
Data File Description : wide5000c series of data files for wide data
Data File Summary:  wide500c,delimited (comma),10-30, 5000, n/a, TRUE,"Character,logical, integer, numeric, boolean, factor", "created by file:rxTestDataSetWide.R"

Where:  Unknown
Contact:  Dawn Kinsey
When:  2010/11/30
Data Description: Description of the data
 # Create Corner case data set wide5000c in a variety of formats: Mainly for import.
#
# creates 3 comma delimited textfiles, first 10 rows, second 10 rows, third 10 rows 
# create xdf file with 3 blocks and 30 rows
#
# Features of the data set:
#   wide:  5000 columns, 10-30 rows
#	funny column names
#	long factor names
#	missings throughout

#SAS .sas7bdat file is made using statTransfer
	
	#columns 1 - 6 are numeric and have really funky variable names:
		#"var1D#F"   --- > SAS file:  VAR1D_F
		#"var1.abc"	 --- > SAS file: VAR1_ABC
		#"varSpac ing"--- > SAS file: VARSPAC_ING
		#"This_is-really_a_very_long_column_name"--- > SAS file: THIS_IS_REALLY_A_VERY_LONG_COLUM
		#"var_123456"--- > SAS file: var_123456
		#"1-$%&^*!1"--- > SAS file: V______1
	#varLetters: V400 - factor, levels = letters --- > SAS file: VARLETTERS
	#varAnimal: V200 - factor,  3 levels: c("LetsMakeThisnameVeryLongToSeeWhatHappensDog-dog", "LetsMakeThisNameVeryLongToSeeWhatHappens bird bird","LetsMakeThisNameVeryLongToSeeWhatHappens#Reptile#reptile")
	#   --- > SAS file: VARANIMAL
	#varInteger: V300 - integer - random  --- > SAS file: VARINTEGER
	#varLogical: V500 - logical - from random--- > SAS file: VARLOGICAL
	#varAnimal: V700 - character, ( chicken horse coyote pig snake frog shrew mouse cat dog)
	# --- > SAS file: VARANIMAL
   #varBoolean: V1000 - boolean, 0 or 1 (integer of varLogical)  --- > SAS  VARBOOLEAN	
   
Additional Notes:
   
   Created by this R Script: located in package UnitTest directory, rxTestDataSetsWide.R
 #
# data creation:
nrow <- 30
ncol <- 5000
set.seed(13)
data <- runif(nrow*ncol,min=0,max=50000)
data[which(data < 10000 & data > 500)] <- NA
families <- c("LetsMakeThisnameVeryLongToSeeWhatHappensDog-dog", "LetsMakeThisNameVeryLongToSeeWhatHappens bird bird","LetsMakeThisNameVeryLongToSeeWhatHappens#Reptile#reptile")
Animal <- c("chicken", "horse","coyote","pig","snake", "frog", "shrew","mouse","cat","dog")
df <- as.data.frame(matrix(data=data,nrow=nrow,ncol=ncol))
		
df$V300 <- as.integer(df$V300)
df$V500 <- df$V500 < 25000
df$V1000 <- as.integer(df$V500)	
df$V200 <- sample(families,nrow,replace=TRUE)
df$V400 <- letters[as.integer(runif(nrow,min=1, max=27))] # this creates no data=z, need to set max=27 to get this.
df$V700 <- sample(Animal,nrow,replace=TRUE)

names(df)[400] <- "varLetters"
names(df)[200] <- "varFamilies"
names(df)[300] <- "varInteger"
names(df)[500] <- "varLogical"
names(df)[700] <- "varAnimal"
names(df)[1000] <- "varBoolean"
df$varFamilies <- factor(df$varFamilies,,levels=families)
df$varLetters <- factor(df$varLetters,,levels=letters)
	
names(df)[1:6] <- c("var1D#F","var1.abc","varSpac ing",
	   "This_is-really_a_very_long_column_name",
	"var_123456","1-$%&^*!1")

# add missing to factor and character variables
df$varFamilies[round(runif(.3*nrow,1,nrow),0)]<- NA
df$varAnimal[round(runif(.1*nrow,1,nrow),0)]<- NA
df$varLetters[round(runif(.2*nrow,1,nrow),0)]<- NA

#look at relevant df elements
df[1:10,1:10]
df[,c(200,300,400,700,1000)]

##########
rootName <- "wide5000c"
datafile <- paste(rootName,"30r3b.xdf",sep="")
# create 3 text files each with 10 rows from the data frame
textFileName <- paste(rootName,"10r",sep="")

########
# write out data:
if(file.exists(datafile)) file.remove(datafile)
#end row number in data frame for text files.
nrow.1 <- nrow/3
nrow.2 <- 2*nrow.1 
nrow.3 <- nrow
# write three text files
write.table(df[1:nrow.1,],paste(textFileName,"1","txt",sep="."),sep=",",row.names=FALSE)
write.table(df[(nrow.1+1):nrow.2,],paste(textFileName,"2","txt",sep="."),sep=",",row.names=FALSE)
write.table(df[(nrow.2+1):nrow.3,],paste(textFileName,"3","txt",sep="."),sep=",",row.names=FALSE)

# make 3 blocks in .xdf file
rxDataFrameToXdf(df[1:nrow.1,],datafile)
rxDataFrameToXdf(df[(nrow.1+1):nrow.2,],datafile,append="rows")
rxDataFrameToXdf(df[(nrow.2+1):nrow.3,],datafile,append="rows")

# check data
fileInfo <- rxGetInfoXdf(datafile, getVarInfo=TRUE, numRows=10,getBlockSizes=TRUE ,varsToKeep=c("var1D#F","var1.abc","varSpac ing",
	   "This_is-really_a_very_long_column_name",
	"var_123456","1-$%&^*!1","varFamilies","varAnimal","varInteger","varLetters","varLogical","varBoolean"))
all.equal(fileInfo$numBlocks,3)
all.equal(fileInfo$rowsPerBlock,c(10,10,10))
all.equal(fileInfo$numVars,5000)
all.equal(names(fileInfo$varInfo)[1:6],c("var1D#F","var1.abc","varSpac ing",
	   "This_is-really_a_very_long_column_name",
	"var_123456","1-$%&^*!1"))
all.equal(fileInfo$varInfo$var1.abc$varType,"numeric")
all.equal(fileInfo$varInfo$varFamilies$varType,"factor")
all.equal(fileInfo$varInfo$varAnimal$varType,"character")
all.equal(fileInfo$varInfo$varLetters$varType,"factor")
all.equal(fileInfo$varInfo$varBoolean$varType,"integer")
all.equal(fileInfo$varInfo$varInteger$varType,"integer")
all.equal(fileInfo$varInfo$varLogical$varType,"logical")
all.equal(fileInfo$varInfo$varFamilies$levels,c("LetsMakeThisnameVeryLongToSeeWhatHappensDog-dog", "LetsMakeThisNameVeryLongToSeeWhatHappens bird bird","LetsMakeThisNameVeryLongToSeeWhatHappens#Reptile#reptile"))
all.equal(fileInfo$varInfo$varLetters$levels,letters)
