Impute MV in data.

impute_mv(
  object,
  sample_id,
  method = c("knn", "rf", "mean", "median", "zero", "minimum", "bpca", "svdImpute",
    "ppca"),
  k = 10,
  rowmax = 0.5,
  colmax = 0.8,
  maxp = 1500,
  rng.seed = 362436069,
  maxiter = 10,
  ntree = 100,
  decreasing = FALSE,
  nPcs = 2,
  maxSteps = 100,
  threshold = 1e-04,
  ...
)

Arguments

object: A mass_dataset object.
sample_id: which samples you want to impute missing value? It is a index or character vector (sample_id)
method: Imputation method. It contains "knn", "rf" (missForest), "mean", "median", "zero", "minium", "bpca" (BPCA), "svd" (SVD) and "ppca" (PPCA). Default is "knn". The detial of this method can be find in detail and reference paperes.
k: See ?impute.knn
rowmax: See ?impute.knn
colmax: See ?impute.knn
maxp: See ?impute.knn
rng.seed: See ?impute.knn
maxiter: See ?missForest
ntree: See ?missForest
decreasing: See ?missForest
nPcs: See ?bpca
maxSteps: See ?bpca
threshold: See ?bpca
...: Other arguments.

Value

A new mass_dataset object.

Author

Xiaotao Shen shenxt1990@outlook.com

Examples

library(massdataset)
data("expression_data")
data("sample_info")
data("variable_info")
object =
  create_mass_dataset(
    expression_data = expression_data,
    sample_info = sample_info,
    variable_info = variable_info
  )
object
#> -------------------- 
#> massdataset version: 1.0.12 
#> -------------------- 
#> 1.expression_data:[ 1000 x 8 data.frame]
#> 2.sample_info:[ 8 x 4 data.frame]
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> 1 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2022-08-07 20:07:30

get_mv_number(object)
#> [1] 3829
massdataset::get_mv_number(object, by = "sample")
#> Blank_3 Blank_4    QC_1    QC_2   PS4P1   PS4P2   PS4P3   PS4P4 
#>     682     702     397     381     424     427     405     411 

###remove variables who have mv in more than 20% QC samples
qc_id =
  object %>%
  activate_mass_dataset(what = "sample_info") %>%
  filter(class == "QC") %>%
  pull(sample_id)

subject_id =
  object %>%
  activate_mass_dataset(what = "sample_info") %>%
  filter(class == "Subject") %>%
  pull(sample_id)

object =
  object %>%
  mutate_variable_na_freq(according_to_samples = qc_id) %>%
  mutate_variable_na_freq(according_to_samples = subject_id) %>%
  activate_mass_dataset(what = "variable_info") %>%
  filter(na_freq < 0.2 & na_freq.1 < 0.5)

###remove samples with MV > 50% except Blank samples
object =
  filter_samples(
    object = object,
    flist = function(x) {
      sum(is.na(x)) / nrow(object) < 0.5
    },
    apply_to = c(qc_id, subject_id),
    prune = TRUE
  )

blank_id =
  object %>%
  activate_mass_dataset(what = "sample_info") %>%
  filter(class == "Blank") %>%
  pull(sample_id)

object1 =
  impute_mv(object = object,
            sample_id = blank_id,
            method = "zero")

object1 %>%
  activate_mass_dataset(what = "expression_data") %>%
  select(dplyr::contains("Blank")) %>%
  extract_expression_data() %>%
  head()
#>               Blank_3 Blank_4
#> M136T55_2_POS       0       0
#> M79T35_POS          0       0
#> M307T548_POS        0       0
#> M349T47_POS         0       0
#> M299T359_POS        0       0
#> M344T471_POS        0       0

object2 =
  impute_mv(object = object,
            sample_id = subject_id,
            method = "knn")

object2 %>%
  activate_mass_dataset(what = "sample_info") %>%
  filter(class == "Subject") %>%
  extract_expression_data() %>%
  head()
#>                   PS4P1     PS4P2     PS4P3     PS4P4
#> M136T55_2_POS 1494436.1 3496912.1 1959178.8 1005418.8
#> M79T35_POS    2471336.1 3333582.7 2734243.8 3361452.3
#> M307T548_POS   288590.2  137297.5  231279.2  271318.3
#> M349T47_POS   5141073.2 8424315.6 7896633.3 6441449.0
#> M299T359_POS  1401632.7 4055989.5 1577496.3 3668817.5
#> M344T471_POS   334718.8  276913.4  304611.5  450460.3