Merge pull request #4 from ercrema/intcal20version

Updated version of the repository with all analyses based on IntCal20 and Marine20 Calibration Curves
ercrema · Dec 6, 2020 · a960854 · a960854
2 parents 1eab41e + e2350bb
commit a960854
Show file tree

Hide file tree

Showing 48 changed files with 878,660 additions and 834,790 deletions.
diff --git a/R/mcsim.R b/R/mcsim.R
@@ -5,12 +5,42 @@
 #'@param posterior A posterior class object created using \code{convertToArray()}.
 #'@param weights How the probability weight should be assigned to each event in case of multiple phases. One between 'equal', 'variance', or 'custom'. Default is 'variance', and 'custom' requires a list with a vector of weights to each of the multiple phases considered (argument customWeight).
 #'@example
-#' load("/Users/enryu/github/ENCOUNTER_github/encounter_jomonphases/results/model0_oxcal.RData")
-#' df=data.frame(StartPhase=c("B1","C1","C234","Z4","C234","C234"),EndPhase=c("B2","C1","C56","Z6","C234","C56"),stringsAsFactors = FALSE)
-#' posterior.a=convertToArray(model0a,"gaussian")
-#' result=sampler(df,nsim=100,posterior=posterior.a,weights="equal")
+#'source("./R/mcsim.R")
+#' source("./R/rdirichlet.R")
+#' 
+#' load("R_images/posteriorSamples.RData")
+#' 
+#' 
+#' nsim=1000
+#' # A Sample of three PitHouse with Start/End ceramic phase
+#' df=data.frame(StartPhase=c("B1","C1","Z4",),EndPhase=c("B2","C1","Z6"))
+#' 
+#' # Hypothetical Count Data 
+#' B1=90
+#' B2=67
+#' Z4=120
+#' Z5=1
+#' Z6=2
+#' 
+#' df=data.frame(StartPhase=c("B1","C1","Z4","B1"),EndPhase=c("B2","C1","Z6","B2"))
+#' weightList=vector('list',length(4))
+#' weightList[[1]]=rdirichlet(n=nsim,alpha=c(B1,B2)+1)
+#' weightList[[2]]=1
+#' weightList[[3]]=rdirichlet(n=nsim,alpha=c(Z4,Z5,Z6)+1)
+#' weightList[[4]]=rdirichlet(n=nsim,alpha=c(B1,B2)+1)
+#' 
+#' eq.prior=simTrapezoid=mcsim(df,nsim=nsim,posterior=postTrapezoid,weights="equal")
+#' var.prior=simTrapezoid=mcsim(df,nsim=nsim,posterior=postTrapezoid,weights="variance")
+#' empiricalbayes.prior=simTrapezoid=mcsim(df,nsim=nsim,posterior=postTrapezoid,weights="custom",weightList=weightList)
+#' 
+#' par(mfrow=c(3,1))
+#' {
+#'   hist(eq.prior[3,],main='Pithouse 3 (Z4-Z6), using equal prior',breaks=seq(5000,7000,50),xlim=c(7000,5000))
+#'   hist(var.prior[3,],main='Pithouse 3 Z4-Z6), using variance (duration) prior',breaks=seq(5000,7000,50),xlim=c(7000,5000))
+#'   hist(empiricalbayes.prior[3,],main='Pithouse 3 (Z4-Z6), using empricial bayes prior',breaks=seq(5000,7000,50),xlim=c(7000,5000))
+#' }
 
-mcsim = function(df,nsim,posterior,weights=c("equal","variance"),verbose=TRUE)
+mcsim = function(df,nsim,posterior,weights=c("equal","variance",'custom'),weightList=NULL,verbose=TRUE)
 {
   require(trapezoid)
   require(rcarbon)
@@ -46,19 +76,33 @@ mcsim = function(df,nsim,posterior,weights=c("equal","variance"),verbose=TRUE)
 
   for (p in 1:nrow(permUnique))
   {
-  #If single phase just a random draw from the probability distribution
-      if(permUnique[p,1]==permUnique[p,2])
-      {
-        sampled.phases[permIndex[[p]]]=as.character(permUnique[p,1])
+    #If single phase just a random draw from the probability distribution
+    if(permUnique[p,1]==permUnique[p,2])
+    {
+      sampled.phases[permIndex[[p]]]=as.character(permUnique[p,1])
+    }
+
+    #If multiple phase assign randomly with probabilit prob
+    if (permUnique[p,1]!=permUnique[p,2])
+    {
+
+      timespan = which(as.character(phases)==as.character(permUnique[p,1])):which(as.character(phases)==as.character(permUnique[p,2]))
+      nsamples = length(permIndex[[p]])
+
+      if (weights%in%c('equal','variance'))
+      { 
+        timespan.weights = v[timespan]
+        prob=timespan.weights/(sum(timespan.weights))
       }
-      if (permUnique[p,1]!=permUnique[p,2])
+      if (weights=='custom')
       {
-        timespan = which(as.character(phases)==as.character(permUnique[p,1])):which(as.character(phases)==as.character(permUnique[p,2]))
-        timespan.weights = v[timespan]
-        nsamples = length(permIndex[[p]])
-        sampled.phases[permIndex[[p]]] = as.character(phases[sample(timespan,nsamples,prob=timespan.weights/(sum(timespan.weights)),replace=TRUE)])
+        tmp.index=permIndex[[p]]
+        if (length(tmp.index)>1){tmp.index=sample(tmp.index,size=1)}
+        prob = weightList[[tmp.index]][sample(1:nsim,size=1),]
       }
+      sampled.phases[permIndex[[p]]] = as.character(phases[sample(timespan,nsamples,prob=prob,replace=TRUE)])
     }
+  }
 
   # Identify unique phases
   unique.phases = unique(sampled.phases)

diff --git a/R/rdirichlet.R b/R/rdirichlet.R
@@ -0,0 +1,7 @@
+rdirichlet<-function (n, alpha) 
+{
+  l <- length(alpha)
+  x <- matrix(rgamma(l * n, alpha), ncol = l, byrow = TRUE)
+  sm <- x %*% rep(1, l)
+  x/as.vector(sm)
+}
diff --git a/README.md b/README.md
@@ -2,11 +2,11 @@
 
 # A multi-proxy inference of Jōmon population dynamics using Bayesian phase models, residential data, and summed probability distribution of <sup>14</sup>C dates: source code, data, and scripts
 
-This repository contains all data and scripts required to fully reproduce all analyses presented in the following paper: 
-
+This repository contains an updated version of the data and scripts used in the following paper.
 
 Crema, E.R., Kobayashi, K., 2020. A multi-proxy inference of Jōmon population dynamics using bayesian phase models, residential data, and summed probability distribution of <sup>14</sup>C dates. Journal of Archaeological Science 117, 105136. DOI: https://doi.org/10.1016/j.jas.2020.105136
 
+The original repository contained scripts based on the IntCal13 and Marine13 calibration curves and can be accessed [here](https://github.com/ercrema/jomonPhasesAndPopulation/tree/v.2.1). Analyses and results contained in this repository are based on the IntCal20 and Marine20 curves and contains. This version contains also an updated version of the function `mcsim()` as well as a new rmarkdown file with a [short Rmarkdown tutorial](./calibrating_jomon_ceramic_phases.Rmd) on how to use the function with different datasets.
 
 The main workflow is recorded in the [log.R](./log.R) file and outputs are stored as R image files located in the [R_images](./R_images) directory.   
 
@@ -16,7 +16,7 @@ All raw data used in the paper can be found in the [data](./data) directory. The
 
 ## Bayesian ceramic phase modelling
 
-Bayesian modelling have been carried out using [OxCal](https://c14.arch.ox.ac.uk/oxcal/OxCal.html), with the preparation of OxCal scripts done in R. The analyses was conducted in three stages. Firstly, potential outliers were detected within sets of dates associated with the same event (e.g. different organic residues from the same vessel). This was achievied by using the `outlierExcluder()` function (stored [here](./R/outlierAnalysis.R)) which internally calls OxCal from R using the [oxcAAR](https://CRAN.R-project.org/package=oxcAAR) package. The function eliminates potential outliers from the initial set of radiocarbon dates and result of this routine was stored in the R image file [c14data.RData](./R_images/c14data.RData). 
+Bayesian modelling have been carried out using [OxCal v4.4](https://c14.arch.ox.ac.uk/oxcal/OxCal.html), with the preparation of OxCal scripts done in R. The analyses was conducted in three stages. Firstly, potential outliers were detected within sets of dates associated with the same event (e.g. different organic residues from the same vessel). This was achievied by using the `outlierExcluder()` function (stored [here](./R/outlierAnalysis.R)) which internally calls OxCal from R using the [oxcAAR](https://CRAN.R-project.org/package=oxcAAR) package. The function eliminates potential outliers from the initial set of radiocarbon dates and result of this routine was stored in the R image file [c14data.RData](./R_images/c14data.RData). 
 
 The second step of analysis consisted in creating OxCal scripts for different probability distributions emulating putative _within-phase uncertainty_. This was achieved by using the `oxcalScriptGen()` function (located [in this file](./R/oxcalScriptCreator.R). The output scripts (stored in the directory [./oxcal/oxcalscripts](./oxcal/oxcalscripts)) were then loaded into OxCal for analyses. The results of the Bayesian analyses are stored in the directory [./oxcal/results](./oxcal/results), and include the .csv storing the posterior samples and a JavaScript file (.js) containing key statistics such as individual and overall agreement indices, read in R using the `oxcalReadjs()` function (see source code [here](./R/oxcalReadjs.R)). 
 
@@ -132,7 +132,7 @@ attached base packages:
 [1] stats     graphics  grDevices utils     methods   base     
 
 other attached packages:
-[1] readr_1.3.1     trapezoid_2.0-0 TTR_0.23-5      rcarbon_1.3.0  
+[1] readr_1.3.1     trapezoid_2.0-0 TTR_0.23-5      rcarbon_1.4.1  
 [5] oxcAAR_1.0.0    dplyr_0.8.3     magrittr_1.5    nvimcom_0.9-82 
 
 loaded via a namespace (and not attached):

diff --git a/R_images/comp.RData b/R_images/comp.RData
diff --git a/R_images/posteriorSamples.RData b/R_images/posteriorSamples.RData
diff --git a/R_images/simdatesPithouses.RData b/R_images/simdatesPithouses.RData
diff --git a/R_images/spdC14.RData b/R_images/spdC14.RData
diff --git a/R_images/spdRes.RData b/R_images/spdRes.RData