1.Use the code that you worked on in Homework #7 (creating fake data
sets), and re-organize it following the principles of structured
programming. Do all the work in a single chunk in your R markdown file,
just as if you were writing a single R script. Start with all of your
annotated functions, preliminary calls, and global variables. The
program body should be only a few lines of code that call the
appropriate functions and run them in the correct order. Make sure that
the output from one function serves as the input to the next. You can
either daisy-chain the functions or write separate lines of code to hold
elements in temporary variables and pass them along.
# FUNCTION: fakedata()
# Creating fake data set and data frame
# This function will create a fake data set using the means and standard deviations from a different data set
# nGroup : 3 treatments groups
# nName: Treatment names (Control, Low, High)
# nSize: sample size for each treatment (30, 27, 25)
# nMean: means for each group (68.37, 19.05, 12.84)
# nSD: Standard deviation for each group (16.58,19.05,12.84)
############################
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
fakedata <- function(nGroup = 3, nName = c("Control","Low", "High"), nSize = c(30,27,25), nMean = c(68.37,51.15,30.46),nSD = c(16.58,19.05,12.84), ID = 1:(sum(nSize))) {
Abundance <- c(rnorm(n=nSize[1],mean=nMean[1],sd=nSD[1]),
rnorm(n=nSize[2],mean=nMean[2],sd=nSD[2]),
rnorm(n=nSize[3],mean=nMean[3],sd=nSD[3]))
Treatment <- rep(nName,nSize)
ANOdata <- data.frame(ID,Treatment,Abundance)
str(ANOdata)
print(ANOdata)
}
x <- fakedata()
## 'data.frame': 82 obs. of 3 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Treatment: chr "Control" "Control" "Control" "Control" ...
## $ Abundance: num 56.9 91.3 61.7 68.9 68.4 ...
## ID Treatment Abundance
## 1 1 Control 56.883128
## 2 2 Control 91.324722
## 3 3 Control 61.749348
## 4 4 Control 68.908503
## 5 5 Control 68.440128
## 6 6 Control 59.337628
## 7 7 Control 74.235414
## 8 8 Control 69.762626
## 9 9 Control 46.748244
## 10 10 Control 98.828270
## 11 11 Control 68.560788
## 12 12 Control 75.247585
## 13 13 Control 65.039910
## 14 14 Control 114.220797
## 15 15 Control 69.942925
## 16 16 Control 38.838423
## 17 17 Control 72.738149
## 18 18 Control 45.881650
## 19 19 Control 90.678136
## 20 20 Control 73.360557
## 21 21 Control 76.518905
## 22 22 Control 45.489635
## 23 23 Control 67.319873
## 24 24 Control 43.279830
## 25 25 Control 70.863189
## 26 26 Control 52.606731
## 27 27 Control 71.330932
## 28 28 Control 68.878848
## 29 29 Control 57.594656
## 30 30 Control 63.673908
## 31 31 Low 17.461779
## 32 32 Low 62.308949
## 33 33 Low 41.352479
## 34 34 Low 43.421908
## 35 35 Low 30.848604
## 36 36 Low 56.232669
## 37 37 Low 48.649898
## 38 38 Low 80.979737
## 39 39 Low 49.592772
## 40 40 Low 15.637032
## 41 41 Low 69.094838
## 42 42 Low 79.218208
## 43 43 Low 44.496574
## 44 44 Low 61.500972
## 45 45 Low 28.455979
## 46 46 Low 68.136241
## 47 47 Low 94.591977
## 48 48 Low 2.100318
## 49 49 Low 63.197445
## 50 50 Low 15.261761
## 51 51 Low 51.040792
## 52 52 Low 48.565443
## 53 53 Low 43.242648
## 54 54 Low 49.099749
## 55 55 Low 45.567600
## 56 56 Low 59.677847
## 57 57 Low 42.013167
## 58 58 High 33.991048
## 59 59 High 29.528443
## 60 60 High 32.530579
## 61 61 High 20.000442
## 62 62 High 38.123042
## 63 63 High 26.969609
## 64 64 High 34.557414
## 65 65 High 38.062620
## 66 66 High 35.958566
## 67 67 High 16.738886
## 68 68 High 33.826473
## 69 69 High 10.748380
## 70 70 High 33.550876
## 71 71 High 16.518594
## 72 72 High 19.100707
## 73 73 High 33.245543
## 74 74 High 13.974129
## 75 75 High 31.044867
## 76 76 High 57.757076
## 77 77 High 54.453397
## 78 78 High 29.544209
## 79 79 High 20.890912
## 80 80 High 43.174056
## 81 81 High 49.617427
## 82 82 High 44.398076
fakedata()
## 'data.frame': 82 obs. of 3 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Treatment: chr "Control" "Control" "Control" "Control" ...
## $ Abundance: num 78.4 63.3 65.8 97.7 89.8 ...
## ID Treatment Abundance
## 1 1 Control 78.367601
## 2 2 Control 63.328275
## 3 3 Control 65.789432
## 4 4 Control 97.703974
## 5 5 Control 89.838625
## 6 6 Control 72.640934
## 7 7 Control 64.297317
## 8 8 Control 78.320845
## 9 9 Control 90.055584
## 10 10 Control 31.006887
## 11 11 Control 65.211701
## 12 12 Control 78.570049
## 13 13 Control 93.674537
## 14 14 Control 53.892176
## 15 15 Control 64.985079
## 16 16 Control 50.356318
## 17 17 Control 88.874795
## 18 18 Control 62.916973
## 19 19 Control 76.602948
## 20 20 Control 96.653617
## 21 21 Control 96.765898
## 22 22 Control 46.065071
## 23 23 Control 54.789801
## 24 24 Control 87.736335
## 25 25 Control 62.722215
## 26 26 Control 79.093950
## 27 27 Control 91.126063
## 28 28 Control 75.219246
## 29 29 Control 27.694752
## 30 30 Control 44.455979
## 31 31 Low 42.557740
## 32 32 Low 52.591658
## 33 33 Low 66.275251
## 34 34 Low 50.104898
## 35 35 Low 54.680279
## 36 36 Low 48.430605
## 37 37 Low 76.308859
## 38 38 Low 59.529223
## 39 39 Low 41.279327
## 40 40 Low 57.095807
## 41 41 Low 48.231497
## 42 42 Low 52.761155
## 43 43 Low 36.535946
## 44 44 Low 30.617568
## 45 45 Low 62.284886
## 46 46 Low 68.975740
## 47 47 Low 35.790610
## 48 48 Low 42.594938
## 49 49 Low 63.415523
## 50 50 Low 69.128532
## 51 51 Low 38.689659
## 52 52 Low 59.651737
## 53 53 Low 67.446216
## 54 54 Low 56.744414
## 55 55 Low 57.468031
## 56 56 Low 38.084942
## 57 57 Low 70.180245
## 58 58 High 14.888202
## 59 59 High 23.693377
## 60 60 High 29.452851
## 61 61 High 7.964364
## 62 62 High 49.757554
## 63 63 High 44.766110
## 64 64 High 21.125826
## 65 65 High 52.980897
## 66 66 High 26.100984
## 67 67 High 30.509363
## 68 68 High 50.965226
## 69 69 High 40.602383
## 70 70 High 32.635893
## 71 71 High 12.515016
## 72 72 High 42.347662
## 73 73 High 20.229096
## 74 74 High 26.288351
## 75 75 High 35.985079
## 76 76 High 29.094122
## 77 77 High 21.904780
## 78 78 High 41.466395
## 79 79 High 9.738752
## 80 80 High 14.062955
## 81 81 High 33.459143
## 82 82 High 57.755563
#############################
# FUNCTION: statsfunc()
# Statistical analysis on data : ANOVA
# Using x from fakedata
statsfunc <- function(ANOdata = x){
abundanceAOV <- aov(Abundance~Treatment,data=ANOdata)
print(abundanceAOV)
print(summary(abundanceAOV))
}
y <- statsfunc()
## Call:
## aov(formula = Abundance ~ Treatment, data = ANOdata)
##
## Terms:
## Treatment Residuals
## Sum of Squares 17500.17 23376.23
## Deg. of Freedom 2 79
##
## Residual standard error: 17.20179
## Estimated effects may be unbalanced
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 17500 8750 29.57 2.59e-10 ***
## Residuals 79 23376 296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
y
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 17500 8750 29.57 2.59e-10 ***
## Residuals 79 23376 296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
################################
# FUNCTION: plotfunc()
# plotting the ANOVA results
#
library(ggplot2)
plotfunc <- function(ANOdata = x){
ANOPlot <- ggplot(data=ANOdata,aes(x=Treatment,y=Abundance,fill=Treatment)) +
geom_boxplot()+
theme_bw()
print(ANOPlot)
}
z <- plotfunc()
z