Skip to contents

Cleaning metadata can take a series of tries. This function helps summarize and explore missing metadata (problems).

Usage

check_problems(
  df,
  check = c("site_id", "aru_id", "date", "date_time", "longitude", "latitude"),
  path = FALSE,
  date = FALSE
)

Arguments

df

Data frame. Either meta data (clean_metadata()) or GPS coordinates (clean_gps())

check

Character. Character vector of columns to check for missing values. Default is site_id, aru_id, date, date_time, longitude and latitude.

path

Logical. Whether to return just the file paths which have missing attributes. Default FALSE

date

Logical. Whether to summarize output by date (as well as site_id and aru_id. Default FALSE.

Value

A data frame summarizing the metadata by site_id, aru_type, aru_id, and (optionally) by date. Presents the number of files, directories, and days worth of recordings, as well as the minimum and maximum recording times.

Examples


m <- clean_metadata(project_files = example_files, pattern_aru_id = "test")
#> Extracting ARU info...
#> Extracting Dates and Times...
#> Identified possible problems with metadata extraction:
#>  No ARU ids were successfully detected (42/42)

check_problems(m)
#> # A tibble: 42 × 5
#>    path                            aru_id site_id date_time           date      
#>    <chr>                           <chr>  <chr>   <dttm>              <date>    
#>  1 a_BARLT10962_P01_1/P01_1_20200… NA     P01_1   2020-05-02 05:00:00 2020-05-02
#>  2 a_BARLT10962_P01_1/P01_1_20200… NA     P01_1   2020-05-03 05:20:00 2020-05-03
#>  3 a_S4A01234_P02_1/P02_1_2020050… NA     P02_1   2020-05-04 05:25:00 2020-05-04
#>  4 a_S4A01234_P02_1/P02_1_2020050… NA     P02_1   2020-05-05 07:30:00 2020-05-05
#>  5 a_BARLT10962_P03_1/P03_1_20200… NA     P03_1   2020-05-06 10:00:00 2020-05-06
#>  6 a_BARLT11111_P04_1/P04_1_20200… NA     P04_1   2020-05-06 05:00:00 2020-05-06
#>  7 a_BARLT11111_P04_1/P04_1_20200… NA     P04_1   2020-05-07 03:25:00 2020-05-07
#>  8 a_BARLT10962_P05_1/P05_1_20200… NA     P05_1   2020-05-07 05:00:00 2020-05-07
#>  9 a_BARLT10962_P06_1/P06_1_20200… NA     P06_1   2020-05-09 05:20:00 2020-05-09
#> 10 a_S4A01234_P07_1/P07_1_2020050… NA     P07_1   2020-05-09 05:25:00 2020-05-09
#> # ℹ 32 more rows
check_problems(m, date = TRUE)
#> # A tibble: 14 × 7
#>    site_id aru_id date       date_time_min       date_time_max       date_time_n
#>    <chr>   <chr>  <date>     <dttm>              <dttm>                    <int>
#>  1 P01_1   NA     2020-05-02 2020-05-02 05:00:00 2020-05-02 05:00:00           3
#>  2 P01_1   NA     2020-05-03 2020-05-03 05:20:00 2020-05-03 05:20:00           3
#>  3 P02_1   NA     2020-05-04 2020-05-04 05:25:00 2020-05-04 05:25:00           3
#>  4 P02_1   NA     2020-05-05 2020-05-05 07:30:00 2020-05-05 07:30:00           3
#>  5 P03_1   NA     2020-05-06 2020-05-06 10:00:00 2020-05-06 10:00:00           3
#>  6 P04_1   NA     2020-05-06 2020-05-06 05:00:00 2020-05-06 05:00:00           3
#>  7 P04_1   NA     2020-05-07 2020-05-07 03:25:00 2020-05-07 03:25:00           3
#>  8 P05_1   NA     2020-05-07 2020-05-07 05:00:00 2020-05-07 05:00:00           3
#>  9 P06_1   NA     2020-05-09 2020-05-09 05:20:00 2020-05-09 05:20:00           3
#> 10 P07_1   NA     2020-05-09 2020-05-09 05:25:00 2020-05-09 05:25:00           3
#> 11 P07_1   NA     2020-05-10 2020-05-10 07:30:00 2020-05-10 07:30:00           3
#> 12 P08_1   NA     2020-05-11 2020-05-11 10:00:00 2020-05-11 10:00:00           3
#> 13 P09_1   NA     2020-05-11 2020-05-11 05:00:00 2020-05-11 05:00:00           3
#> 14 P10_1   NA     2020-05-11 2020-05-11 03:25:00 2020-05-11 03:25:00           3
#> # ℹ 1 more variable: date_time_n_na <int>
check_problems(m, path = TRUE)
#>  [1] "a_BARLT10962_P01_1/P01_1_20200502T050000_ARU.wav"
#>  [2] "a_BARLT10962_P01_1/P01_1_20200503T052000_ARU.wav"
#>  [3] "a_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav"  
#>  [4] "a_S4A01234_P02_1/P02_1_20200505T073000_ARU.wav"  
#>  [5] "a_BARLT10962_P03_1/P03_1_20200506T100000_ARU.wav"
#>  [6] "a_BARLT11111_P04_1/P04_1_20200506T050000_ARU.wav"
#>  [7] "a_BARLT11111_P04_1/P04_1_20200507T032500_ARU.wav"
#>  [8] "a_BARLT10962_P05_1/P05_1_20200507T050000_ARU.wav"
#>  [9] "a_BARLT10962_P06_1/P06_1_20200509T052000_ARU.wav"
#> [10] "a_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav"  
#> [11] "a_S4A01234_P07_1/P07_1_20200510T073000_ARU.wav"  
#> [12] "a_BARLT10962_P08_1/P08_1_20200511T100000_ARU.wav"
#> [13] "a_S4A02222_P09_1/P09_1_20200511T050000_ARU.wav"  
#> [14] "a_S4A03333_P10_1/P10_1_20200511T032500_ARU.wav"  
#> [15] "j_BARLT10962_P01_1/P01_1_20200502T050000_ARU.wav"
#> [16] "j_BARLT10962_P01_1/P01_1_20200503T052000_ARU.wav"
#> [17] "j_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav"  
#> [18] "j_S4A01234_P02_1/P02_1_20200505T073000_ARU.wav"  
#> [19] "j_BARLT10962_P03_1/P03_1_20200506T100000_ARU.wav"
#> [20] "j_BARLT11111_P04_1/P04_1_20200506T050000_ARU.wav"
#> [21] "j_BARLT11111_P04_1/P04_1_20200507T032500_ARU.wav"
#> [22] "j_BARLT10962_P05_1/P05_1_20200507T050000_ARU.wav"
#> [23] "j_BARLT10962_P06_1/P06_1_20200509T052000_ARU.wav"
#> [24] "j_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav"  
#> [25] "j_S4A01234_P07_1/P07_1_20200510T073000_ARU.wav"  
#> [26] "j_BARLT10962_P08_1/P08_1_20200511T100000_ARU.wav"
#> [27] "j_S4A02222_P09_1/P09_1_20200511T050000_ARU.wav"  
#> [28] "j_S4A03333_P10_1/P10_1_20200511T032500_ARU.wav"  
#> [29] "o_BARLT10962_P01_1/P01_1_20200502T050000_ARU.wav"
#> [30] "o_BARLT10962_P01_1/P01_1_20200503T052000_ARU.wav"
#> [31] "o_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav"  
#> [32] "o_S4A01234_P02_1/P02_1_20200505T073000_ARU.wav"  
#> [33] "o_BARLT10962_P03_1/P03_1_20200506T100000_ARU.wav"
#> [34] "o_BARLT11111_P04_1/P04_1_20200506T050000_ARU.wav"
#> [35] "o_BARLT11111_P04_1/P04_1_20200507T032500_ARU.wav"
#> [36] "o_BARLT10962_P05_1/P05_1_20200507T050000_ARU.wav"
#> [37] "o_BARLT10962_P06_1/P06_1_20200509T052000_ARU.wav"
#> [38] "o_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav"  
#> [39] "o_S4A01234_P07_1/P07_1_20200510T073000_ARU.wav"  
#> [40] "o_BARLT10962_P08_1/P08_1_20200511T100000_ARU.wav"
#> [41] "o_S4A02222_P09_1/P09_1_20200511T050000_ARU.wav"  
#> [42] "o_S4A03333_P10_1/P10_1_20200511T032500_ARU.wav"