Introduction

This guide provides R code for estimating heritability and evolvability using common experimental designs. To use this guide, you must have your data saved in CSV files with the specified column names. The code is designed to be easily adapted to your own data.

Before running the code, ensure you have the necessary R packages installed. The lme4 package is required for mixed-effects models.

# Run this code once if you don't have the lme4 package installed
install.packages("lme4")
library(lme4)

Part 1: Narrow-Sense Heritability (h²) and Evolvability from Pedigrees

These methods are used in populations with known family structures. They allow for the estimation of narrow-sense heritability (h²), which considers only additive genetic variance, and the Coefficient of Additive Genetic Variation (CVA), a measure of evolvability.

A. Parent-Offspring Regression

This method estimates h² by regressing offspring phenotypes on parental phenotypes. The slope of the relationship is proportional to the heritability.

Data Format (parent_offspring.csv)

A CSV file with two columns: phenotype_parent and phenotype_offspring. For mid-parent regression, use the average of the two parents as phenotype_parent.

phenotype_parent phenotype_offspring
10.5 10.8
9.8 10.1

Function Definition

calculate_h2_parent_offspring <- function(data, regression_type = "single_parent", n_bootstraps = 1000) {
  # --- Validate Inputs ---
  if (!is.data.frame(data)) stop("Input 'data' must be a dataframe.")
  required_cols <- c("phenotype_parent", "phenotype_offspring")
  if (!all(required_cols %in% names(data))) stop(paste("Dataframe must contain:", paste(required_cols, collapse = ", ")))
  if (!(regression_type %in% c("single_parent", "mid_parent"))) stop("regression_type must be 'single_parent' or 'mid_parent'.")

  # --- Point Estimate Calculation ---
  model <- lm(phenotype_offspring ~ phenotype_parent, data = data)
  slope <- coef(model)["phenotype_parent"]
  multiplier <- ifelse(regression_type == "single_parent", 2, 1)
  h2_est <- multiplier * slope
  Vp_offspring <- var(data$phenotype_offspring)
  Va_est <- h2_est * Vp_offspring
  mean_offspring <- mean(data$phenotype_offspring)
  CVA_est <- if (Va_est < 0) NA else (100 * sqrt(Va_est)) / mean_offspring
  
  # --- Bootstrap for Confidence Intervals ---
  boot_h2 <- numeric(n_bootstraps)
  boot_CVA <- numeric(n_bootstraps)
  
  for (i in 1:n_bootstraps) {
    boot_data <- data[sample(nrow(data), replace = TRUE), ]
    boot_model <- lm(phenotype_offspring ~ phenotype_parent, data = boot_data)
    boot_slope <- coef(boot_model)["phenotype_parent"]
    
    if (is.na(boot_slope)) {
      boot_h2[i] <- NA
      boot_CVA[i] <- NA
      next
    }
    
    boot_h2[i] <- multiplier * boot_slope
    boot_Vp <- var(boot_data$phenotype_offspring)
    boot_Va <- boot_h2[i] * boot_Vp
    boot_mean <- mean(boot_data$phenotype_offspring)
    boot_CVA[i] <- if (boot_Va < 0) NA else (100 * sqrt(boot_Va)) / boot_mean
  }
  
  h2_ci <- quantile(boot_h2, c(0.025, 0.975), na.rm = TRUE)
  CVA_ci <- quantile(boot_CVA, c(0.025, 0.975), na.rm = TRUE)
  
  return(list(h2 = h2_est, h2_ci_95 = h2_ci, CVA_percent = CVA_est, CVA_ci_95 = CVA_ci))
}

Example Usage

# Assumes 'parent_offspring.csv' is in your working directory
parent_offspring_data <- read.csv("parent_offspring.csv")

# Run the function. Note the 'CVA_percent' value (evolvability) in the output.
calculate_h2_parent_offspring(parent_offspring_data, regression_type = "single_parent")
## $h2
## phenotype_parent 
##        0.6770658 
## 
## $h2_ci_95
##       2.5%      97.5% 
## 0.01472286 0.99677152 
## 
## $CVA_percent
## phenotype_parent 
##         2.416255 
## 
## $CVA_ci_95
##      2.5%     97.5% 
## 0.2517366 3.5490656

B. Half-Sib ANOVA

In a half-sib design (e.g., multiple dams mated to each sire), h² can be estimated by partitioning variance components using a linear mixed model. The variance among sires is proportional to the additive genetic variance.

Data Format (half_sib.csv)

A CSV file with columns for sire_ID, dam_ID, and phenotype.

sire_ID dam_ID phenotype
Sire1 Dam1A 10.2
Sire1 Dam1B 9.9
Sire2 Dam2A 11.5

Function Definition

calculate_h2_half_sib <- function(data, n_bootstraps = 1000) {
  # --- Validate Inputs ---
  if (!is.data.frame(data)) stop("Input 'data' must be a dataframe.")
  required_cols <- c("sire_ID", "dam_ID", "phenotype")
  if (!all(required_cols %in% names(data))) stop(paste("Dataframe must contain:", paste(required_cols, collapse = ", ")))
  
  # --- Point Estimate Calculation ---
  model <- lmer(phenotype ~ (1 | sire_ID) + (1 | dam_ID), data = data)
  variances <- as.data.frame(VarCorr(model))
  var_sire <- variances[variances$grp == "sire_ID", "vcov"]
  var_dam <- variances[variances$grp == "dam_ID", "vcov"]
  var_residual <- variances[variances$grp == "Residual", "vcov"]
  Vp <- var_sire + var_dam + var_residual
  h2_est <- (4 * var_sire) / Vp
  Va_est <- 4 * var_sire
  trait_mean <- mean(data$phenotype)
  CVA_est <- (100 * sqrt(Va_est)) / trait_mean
  
  # --- Bootstrap for Confidence Intervals ---
  boot_h2 <- numeric(n_bootstraps)
  boot_CVA <- numeric(n_bootstraps)
  
  for (i in 1:n_bootstraps) {
    boot_data <- data[sample(nrow(data), replace = TRUE), ]
    
    # Use try to handle potential model convergence issues on bootstrap samples
    boot_model <- try(lmer(phenotype ~ (1 | sire_ID) + (1 | dam_ID), data = boot_data), silent = TRUE)
    
    if (inherits(boot_model, "try-error")) {
      boot_h2[i] <- NA
      boot_CVA[i] <- NA
      next
    }
    
    boot_vars <- as.data.frame(VarCorr(boot_model))
    boot_var_sire <- boot_vars[boot_vars$grp == "sire_ID", "vcov"]
    boot_var_dam <- boot_vars[boot_vars$grp == "dam_ID", "vcov"]
    boot_var_res <- boot_vars[boot_vars$grp == "Residual", "vcov"]
    
    # Ensure all variance components were estimated
    if (length(c(boot_var_sire, boot_var_dam, boot_var_res)) < 3) {
       boot_h2[i] <- NA; boot_CVA[i] <- NA; next
    }

    boot_Vp <- boot_var_sire + boot_var_dam + boot_var_res
    boot_h2[i] <- (4 * boot_var_sire) / boot_Vp
    boot_Va <- 4 * boot_var_sire
    boot_CVA[i] <- (100 * sqrt(boot_Va)) / mean(boot_data$phenotype)
  }
  
  h2_ci <- quantile(boot_h2, c(0.025, 0.975), na.rm = TRUE)
  CVA_ci <- quantile(boot_CVA, c(0.025, 0.975), na.rm = TRUE)
  
  return(list(h2 = h2_est, h2_ci_95 = h2_ci, CVA_percent = CVA_est, CVA_ci_95 = CVA_ci))
}

Example Usage

library(lme4)

# Assumes 'half_sib.csv' is in your working directory
half_sib_data <- read.csv("half_sib.csv")

# Run the function. Note the 'CVA_percent' value (evolvability) in the output.
calculate_h2_half_sib(half_sib_data)
## $h2
## [1] 0.8390678
## 
## $h2_ci_95
##     2.5%    97.5% 
## 0.000000 3.100721 
## 
## $CVA_percent
## [1] 6.739083
## 
## $CVA_ci_95
##     2.5%    97.5% 
##  0.00000 16.40048

Part 2: Realized Heritability (h²) and Evolvability

This method calculates realized heritability and evolvability from a one-generation selection experiment using the breeder’s equation (R = h²S). This approach is most efficient under truncation selection, where the selection differential (S) can be calculated without measuring the selected parents, using only the proportion of the population selected.

A. From Measured Means (Classic Breeder’s Equation)

Data Format (No CSV file)

This method does not use a CSV file and instead uses the means of initial population, selected parents, and offspring.

Function Definition

calculate_h2_realized_from_means <- function(mean_initial, mean_selected, mean_offspring) {
  if (!is.numeric(mean_initial) || !is.numeric(mean_selected) || !is.numeric(mean_offspring)) stop("All mean values must be numeric.")
  R <- mean_offspring - mean_initial
  S <- mean_selected - mean_initial
  h2 <- if (S == 0) NA else R / S
  evolvability <- R / mean_initial
  return(list(realized_h2 = h2, mean_standardized_evolvability = evolvability, selection_differential_S = S, response_to_selection_R = R))
}

Example Usage

# Note the 'mean_standardized_evolvability' in the output.
calculate_h2_realized_from_means(
  mean_initial = 100,
  mean_selected = 125,
  mean_offspring = 110
)
## $realized_h2
## [1] 0.4
## 
## $mean_standardized_evolvability
## [1] 0.1
## 
## $selection_differential_S
## [1] 25
## 
## $response_to_selection_R
## [1] 10

Note on Confidence Intervals: Calculating confidence intervals for this function requires the standard errors and sample sizes of the input means. Since this function only accepts the point estimates of the means, it cannot compute confidence intervals. To get CIs, you would need to use a function that takes raw phenotype data as input (see Part 2B)

B. From Selection Proportion (Truncation Selection)

Data Format (selection_experiment.csv)

A CSV file with 2 columns: group (initial or offspring) and phenotype.

group phenotype
initial 10.2
initial 9.9
offspring 11.5

Function Definition

calculate_h2_realized_from_proportion <- function(data, proportion_selected, n_bootstraps = 1000) {
  # --- Validate Inputs ---
  if (!is.data.frame(data)) stop("Input 'data' must be a dataframe.")
  if (!all(c("group", "phenotype") %in% names(data))) stop("Data must contain 'group' and 'phenotype'.")
  if (!is.numeric(proportion_selected) || proportion_selected <= 0 || proportion_selected >= 1) stop("'proportion_selected' must be between 0 and 1.")
  
  # --- Point Estimate Calculation ---
  initial_pheno <- data$phenotype[data$group == "initial"]
  offspring_pheno <- data$phenotype[data$group == "offspring"]
  R <- mean(offspring_pheno) - mean(initial_pheno)
  selection_intensity <- dnorm(qnorm(1 - proportion_selected)) / proportion_selected
  S <- selection_intensity * sd(initial_pheno)
  h2_est <- R / S
  Va_est <- h2_est * var(initial_pheno)
  CVA_est <- if (Va_est < 0) NA else (100 * sqrt(Va_est)) / mean(initial_pheno)
  
  # --- Bootstrap for Confidence Intervals ---
  boot_h2 <- numeric(n_bootstraps)
  boot_CVA <- numeric(n_bootstraps)
  
  for (i in 1:n_bootstraps) {
    boot_initial <- sample(initial_pheno, replace = TRUE)
    boot_offspring <- sample(offspring_pheno, replace = TRUE)
    
    boot_R <- mean(boot_offspring) - mean(boot_initial)
    boot_S <- selection_intensity * sd(boot_initial)
    
    if (boot_S == 0) {
      boot_h2[i] <- NA; boot_CVA[i] <- NA; next
    }
    
    boot_h2[i] <- boot_R / boot_S
    boot_Va <- boot_h2[i] * var(boot_initial)
    boot_CVA[i] <- if (boot_Va < 0) NA else (100 * sqrt(boot_Va)) / mean(boot_initial)
  }
  
  h2_ci <- quantile(boot_h2, c(0.025, 0.975), na.rm = TRUE)
  CVA_ci <- quantile(boot_CVA, c(0.025, 0.975), na.rm = TRUE)
  
  return(list(realized_h2 = h2_est, h2_ci_95 = h2_ci, CVA_percent = CVA_est, CVA_ci_95 = CVA_ci))
}

Example Usage

# Assumes 'selection_experiment.csv' is in your working directory
selection_data <- read.csv("selection_experiment.csv")

# Run the function. Note the 'mean_standardized_evolvability' in the output.
calculate_h2_realized_from_proportion(selection_data, proportion_selected = 0.2)
## $realized_h2
## [1] 0.4433013
## 
## $h2_ci_95
##       2.5%      97.5% 
## -0.5655582  1.8268174 
## 
## $CVA_percent
## [1] 4.118413
## 
## $CVA_ci_95
##      2.5%     97.5% 
## 0.6238446 7.4017501

Part 3: Broad-Sense Heritability (H²) from Replicated Lines

This method is for genetically identical lines (e.g., RILs, inbred lines, clones) and estimates broad-sense heritability (H²), which includes all genetic variance (additive, dominance, and epistatic).

Data Format (replicated_lines.csv)

A CSV file with two columns: line_ID and phenotype.

line_ID phenotype
RIL_1 15.3
RIL_1 14.9
RIL_2 12.1

Function Definition

calculate_H2_replicated_lines <- function(data, n_bootstraps = 1000) {
  # --- Validate Inputs ---
  if (!is.data.frame(data)) stop("Input 'data' must be a dataframe.")
  if (!all(c("line_ID", "phenotype") %in% names(data))) stop("Data must contain 'line_ID' and 'phenotype'.")
  
  # --- Point Estimate Calculation ---
  model <- lmer(phenotype ~ (1 | line_ID), data = data)
  variances <- as.data.frame(VarCorr(model))
  var_genetic <- variances[variances$grp == "line_ID", "vcov"]
  var_residual <- variances[variances$grp == "Residual", "vcov"]
  H2_est <- var_genetic / (var_genetic + var_residual)
  
  # --- Bootstrap for Confidence Intervals ---
  boot_H2 <- numeric(n_bootstraps)
  
  for (i in 1:n_bootstraps) {
    boot_data <- data[sample(nrow(data), replace = TRUE), ]
    boot_model <- try(lmer(phenotype ~ (1 | line_ID), data = boot_data), silent = TRUE)
    
    if (inherits(boot_model, "try-error")) {
      boot_H2[i] <- NA; next
    }
    
    boot_vars <- as.data.frame(VarCorr(boot_model))
    boot_var_gen <- boot_vars[boot_vars$grp == "line_ID", "vcov"]
    boot_var_res <- boot_vars[boot_vars$grp == "Residual", "vcov"]
    
    if (length(c(boot_var_gen, boot_var_res)) < 2) {
      boot_H2[i] <- NA; next
    }
    
    boot_H2[i] <- boot_var_gen / (boot_var_gen + boot_var_res)
  }
  
  H2_ci <- quantile(boot_H2, c(0.025, 0.975), na.rm = TRUE)
  
  return(list(broad_sense_H2 = H2_est, H2_ci_95 = H2_ci))
}

Example Usage

library(lme4)

# Assumes 'replicated_lines.csv' is in your working directory
ril_data <- read.csv("replicated_lines.csv")

# Run the function for inbred lines.
calculate_H2_replicated_lines(ril_data)
## $broad_sense_H2
## [1] 0.6555087
## 
## $H2_ci_95
##      2.5%     97.5% 
## 0.0000000 0.9460147

Note on Evolvability: Evolvability (CVA) requires an estimate of the additive genetic variance (Vₐ). This method estimates total genetic variance (V₉), which includes dominance and epistatic effects. Therefore, CVA cannot be directly calculated from these results.

Part 4: Heritability of Threshold Traits

This method is for binary (0/1) or disease (present/absent) traits where a continuous phenotype is not measured. It assumes an underlying, unobserved continuous scale called ‘liability’. Heritability is estimated on this liability scale based on the prevalence of the trait in the general population and in the relatives of affected individuals.

Data Format (No CSV file)

This function does not require a data file. Instead, it requires two prevalence rates (proportions between 0 and 1) and can optionally accept sample sizes to calculate confidence intervals.

  • general_pop_prevalence: The proportion of individuals in the general population that have the trait.
  • relatives_prevalence: The proportion of individuals that have the trait among the relatives of affected individuals.
  • n_general_pop (Optional): The total number of individuals in the general population sample.
  • n_relatives (Optional): The total number of individuals in the relatives sample.

Function Definition

calculate_h2_liability_threshold <- function(general_pop_prevalence, relatives_prevalence, relationship_r = 0.5, 
                                           n_general_pop = NULL, n_relatives = NULL, n_bootstraps = 1000) {
  
  # --- Internal function to calculate h2 from prevalences ---
  .h2_from_prev <- function(p_g, p_r, r) {
    # Handle edge cases where prevalence is 0 or 1
    if (p_g <= 0 || p_g >= 1 || p_r <= 0 || p_r >= 1) return(NA)
    
    t_g <- qnorm(1 - p_g)
    t_r <- qnorm(1 - p_r)
    x_g <- dnorm(t_g) / p_g
    h2 <- (x_g * (t_g - t_r)) / (r * (1 + (x_g * (x_g - t_g))))
    return(h2)
  }
  
  # --- Point Estimate ---
  h2_est <- .h2_from_prev(general_pop_prevalence, relatives_prevalence, relationship_r)
  
  # --- Conditional Confidence Interval Calculation ---
  if (!is.null(n_general_pop) && !is.null(n_relatives)) {
    
    boot_h2 <- numeric(n_bootstraps)
    for (i in 1:n_bootstraps) {
      # Parametric bootstrap: simulate new counts based on original prevalence and N
      boot_count_g <- rbinom(1, n_general_pop, general_pop_prevalence)
      boot_count_r <- rbinom(1, n_relatives, relatives_prevalence)
      
      boot_prev_g <- boot_count_g / n_general_pop
      boot_prev_r <- boot_count_r / n_relatives
      
      boot_h2[i] <- .h2_from_prev(boot_prev_g, boot_prev_r, relationship_r)
    }
    
    h2_ci <- quantile(boot_h2, c(0.025, 0.975), na.rm = TRUE)
    
    return(list(heritability_on_liability_scale = h2_est, h2_ci_95 = h2_ci))
    
  } else {
    # Return only the point estimate if sample sizes are not provided
    return(list(heritability_on_liability_scale = h2_est, 
                note = "Provide n_general_pop and n_relatives to calculate confidence intervals."))
  }
}

Example Usage

# Example 1: No sample sizes provided (returns point estimate only)
# A disease has a 1% prevalence in the general population,
# but a 2% prevalence in the full-siblings (r=0.5) of affected individuals.
calculate_h2_liability_threshold(
  general_pop_prevalence = 0.01,
  relatives_prevalence = 0.02,
  relationship_r = 0.5
)
## $heritability_on_liability_scale
## [1] 0.7635069
## 
## $note
## [1] "Provide n_general_pop and n_relatives to calculate confidence intervals."
# Example 2: Sample sizes provided (returns point estimate and 95% CI)
calculate_h2_liability_threshold(
  general_pop_prevalence = 0.01,
  relatives_prevalence = 0.02,
  relationship_r = 0.5,
  n_general_pop = 10000,
  n_relatives = 500
)
## $heritability_on_liability_scale
## [1] 0.7635069
## 
## $h2_ci_95
##       2.5%      97.5% 
## -0.1678137  1.4290634

Note on Evolvability: This model estimates heritability on a conceptual, unobserved ‘liability’ scale. Standard evolvability metrics like the Coefficient of Additive Genetic Variation (CVA) require a measured trait mean and are therefore not applicable to this method.

Part 5: Realized Heritability from Proportions (X-QTL Method)

This method, based on the principles of liability threshold models, estimates realized heritability from an X-QTL experiment. It is useful when you don’t have individual phenotypic measurements but instead know the proportion of the parental population that was selected and the proportion of the offspring generation that exceeds the same selection threshold.

Data Format (No CSV file)

This function does not require a data file. It uses two proportions (values between 0 and 1) and can optionally accept sample sizes to calculate confidence intervals.

  • Par_pro: The proportion of parents selected from the P₀ generation (e.g., the top 5% = 0.05).
  • Off_pro: The proportion of the F₁ offspring generation that exceeds the same phenotypic threshold used to select the parents.
  • n_parents (Optional): The total number of individuals in the parent generation P₀.
  • n_offspring (Optional): The total number of individuals in the offspring generation F₁.

Function Definition

realized_heritability_XQTL <- function(Par_pro, Off_pro, n_parents = NULL, n_offspring = NULL, n_bootstraps = 1000) {

  # --- Internal function to calculate h2 from proportions ---
  .h2_from_prop <- function(p_par, p_off) {
    # Handle edge cases where proportions are 0 or 1
    if (p_par <= 0 || p_par >= 1 || p_off <= 0 || p_off >= 1) return(NA)
    
    threshold <- qnorm(1 - p_par)
    S <- dnorm(threshold) / p_par
    R <- threshold - qnorm(1 - p_off)
    h2 <- R / S
    return(h2)
  }
  
  # --- Point Estimate ---
  h2_est <- .h2_from_prop(Par_pro, Off_pro)
  
  # --- Conditional Confidence Interval Calculation ---
  if (!is.null(n_parents) && !is.null(n_offspring)) {
    
    boot_h2 <- numeric(n_bootstraps)
    for (i in 1:n_bootstraps) {
      # Parametric bootstrap: simulate new counts based on original proportions and N
      boot_count_par <- rbinom(1, n_parents, Par_pro)
      boot_count_off <- rbinom(1, n_offspring, Off_pro)
      
      boot_prop_par <- boot_count_par / n_parents
      boot_prop_off <- boot_count_off / n_offspring
      
      boot_h2[i] <- .h2_from_prop(boot_prop_par, boot_prop_off)
    }
    
    h2_ci <- quantile(boot_h2, c(0.025, 0.975), na.rm = TRUE)
    
    return(list(realized_heritability_on_liability_scale = h2_est, h2_ci_95 = h2_ci))
    
  } else {
    # Return only the point estimate if sample sizes are not provided
    return(list(realized_heritability_on_liability_scale = h2_est,
                note = "Provide n_parents and n_offspring to calculate confidence intervals."))
  }
}

Example Usage

# Example 1: No sample sizes provided (returns point estimate only). 
# An experimenter selects the top 5% of flies for a trait (e.g., crawling height).
# After they reproduce, 10% of their offspring now exceed that same height threshold.
realized_heritability_XQTL(Par_pro = 0.05, Off_pro = 0.10)
## $realized_heritability_on_liability_scale
## [1] 0.1761283
## 
## $note
## [1] "Provide n_parents and n_offspring to calculate confidence intervals."
# Example 2: Sample sizes provided (returns point estimate and 95% CI)
# In a drug exposure experiment, 3% of flies survive (the selected parents).
# Their offspring are raised and re-exposed, and now 10% of the F1 generation survives.
realized_heritability_XQTL(
  Par_pro = 0.03, 
  Off_pro = 0.10,
  n_parents = 2000,
  n_offspring = 1000
)
## $realized_heritability_on_liability_scale
## [1] 0.2642085
## 
## $h2_ci_95
##      2.5%     97.5% 
## 0.2020438 0.3233672

Note on Evolvability: Like the liability threshold model in Part 4, this method operates on a standardized, unobserved ‘liability’ scale. Because it does not use a measured trait mean, standard evolvability metrics like the Coefficient of Additive Genetic Variation (CVA) are not applicable.