Cleaning metadata can take a series of tries. This function helps summarize and explore missing metadata (problems).
Usage
check_problems(
df,
check = c("site_id", "aru_id", "date", "date_time", "longitude", "latitude"),
path = FALSE,
date = FALSE
)
Arguments
- df
Data frame. Either meta data (
clean_metadata()
) or GPS coordinates (clean_gps()
)- check
Character. Character vector of columns to check for missing values. Default is
site_id
,aru_id
,date
,date_time
,longitude
andlatitude
.- path
Logical. Whether to return just the file paths which have missing attributes. Default
FALSE
- date
Logical. Whether to summarize output by date (as well as
site_id
andaru_id
. DefaultFALSE
.
Value
A data frame summarizing the metadata by site_id, aru_type, aru_id, and (optionally) by date. Presents the number of files, directories, and days worth of recordings, as well as the minimum and maximum recording times.
Examples
m <- clean_metadata(project_files = example_files, pattern_aru_id = "test")
#> Extracting ARU info...
#> Extracting Dates and Times...
#> Identified possible problems with metadata extraction:
#> ✖ No ARU ids were successfully detected (42/42)
check_problems(m)
#> # A tibble: 42 × 6
#> path aru_id site_id tz_offset date_time date
#> <chr> <chr> <chr> <chr> <dttm> <date>
#> 1 a_BARLT10962_P01_1/P… NA P01_1 -0400 2020-05-02 05:00:00 2020-05-02
#> 2 a_BARLT10962_P01_1/P… NA P01_1 -0400 2020-05-03 05:20:00 2020-05-03
#> 3 a_S4A01234_P02_1/P02… NA P02_1 NA 2020-05-04 05:25:00 2020-05-04
#> 4 a_S4A01234_P02_1/P02… NA P02_1 NA 2020-05-05 07:30:00 2020-05-05
#> 5 a_BARLT10962_P03_1/P… NA P03_1 -0400 2020-05-06 10:00:00 2020-05-06
#> 6 a_BARLT11111_P04_1/P… NA P04_1 -0400 2020-05-06 05:00:00 2020-05-06
#> 7 a_BARLT11111_P04_1/P… NA P04_1 -0400 2020-05-07 03:25:00 2020-05-07
#> 8 a_BARLT10962_P05_1/P… NA P05_1 -0400 2020-05-07 05:00:00 2020-05-07
#> 9 a_BARLT10962_P06_1/P… NA P06_1 -0400 2020-05-09 05:20:00 2020-05-09
#> 10 a_S4A01234_P07_1/P07… NA P07_1 NA 2020-05-09 05:25:00 2020-05-09
#> # ℹ 32 more rows
check_problems(m, date = TRUE)
#> # A tibble: 14 × 7
#> site_id aru_id date date_time_min date_time_max date_time_n
#> <chr> <chr> <date> <dttm> <dttm> <int>
#> 1 P01_1 NA 2020-05-02 2020-05-02 05:00:00 2020-05-02 05:00:00 3
#> 2 P01_1 NA 2020-05-03 2020-05-03 05:20:00 2020-05-03 05:20:00 3
#> 3 P02_1 NA 2020-05-04 2020-05-04 05:25:00 2020-05-04 05:25:00 3
#> 4 P02_1 NA 2020-05-05 2020-05-05 07:30:00 2020-05-05 07:30:00 3
#> 5 P03_1 NA 2020-05-06 2020-05-06 10:00:00 2020-05-06 10:00:00 3
#> 6 P04_1 NA 2020-05-06 2020-05-06 05:00:00 2020-05-06 05:00:00 3
#> 7 P04_1 NA 2020-05-07 2020-05-07 03:25:00 2020-05-07 03:25:00 3
#> 8 P05_1 NA 2020-05-07 2020-05-07 05:00:00 2020-05-07 05:00:00 3
#> 9 P06_1 NA 2020-05-09 2020-05-09 05:20:00 2020-05-09 05:20:00 3
#> 10 P07_1 NA 2020-05-09 2020-05-09 05:25:00 2020-05-09 05:25:00 3
#> 11 P07_1 NA 2020-05-10 2020-05-10 07:30:00 2020-05-10 07:30:00 3
#> 12 P08_1 NA 2020-05-11 2020-05-11 10:00:00 2020-05-11 10:00:00 3
#> 13 P09_1 NA 2020-05-11 2020-05-11 05:00:00 2020-05-11 05:00:00 3
#> 14 P10_1 NA 2020-05-11 2020-05-11 03:25:00 2020-05-11 03:25:00 3
#> # ℹ 1 more variable: date_time_n_na <int>
check_problems(m, path = TRUE)
#> [1] "a_BARLT10962_P01_1/P01_1_20200502T050000-0400_ARU.wav"
#> [2] "a_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav"
#> [3] "a_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav"
#> [4] "a_S4A01234_P02_1/P02_1_20200505T073000_ARU.wav"
#> [5] "a_BARLT10962_P03_1/P03_1_20200506T100000-0400_ARU.wav"
#> [6] "a_BARLT11111_P04_1/P04_1_20200506T050000-0400_ARU.wav"
#> [7] "a_BARLT11111_P04_1/P04_1_20200507T032500-0400_ARU.wav"
#> [8] "a_BARLT10962_P05_1/P05_1_20200507T050000-0400_ARU.wav"
#> [9] "a_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav"
#> [10] "a_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav"
#> [11] "a_S4A01234_P07_1/P07_1_20200510T073000_ARU.wav"
#> [12] "a_BARLT10962_P08_1/P08_1_20200511T100000-0400_ARU.wav"
#> [13] "a_S4A02222_P09_1/P09_1_20200511T050000_ARU.wav"
#> [14] "a_S4A03333_P10_1/P10_1_20200511T032500_ARU.wav"
#> [15] "j_BARLT10962_P01_1/P01_1_20200502T050000-0400_ARU.wav"
#> [16] "j_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav"
#> [17] "j_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav"
#> [18] "j_S4A01234_P02_1/P02_1_20200505T073000_ARU.wav"
#> [19] "j_BARLT10962_P03_1/P03_1_20200506T100000-0400_ARU.wav"
#> [20] "j_BARLT11111_P04_1/P04_1_20200506T050000-0400_ARU.wav"
#> [21] "j_BARLT11111_P04_1/P04_1_20200507T032500-0400_ARU.wav"
#> [22] "j_BARLT10962_P05_1/P05_1_20200507T050000-0400_ARU.wav"
#> [23] "j_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav"
#> [24] "j_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav"
#> [25] "j_S4A01234_P07_1/P07_1_20200510T073000_ARU.wav"
#> [26] "j_BARLT10962_P08_1/P08_1_20200511T100000-0400_ARU.wav"
#> [27] "j_S4A02222_P09_1/P09_1_20200511T050000_ARU.wav"
#> [28] "j_S4A03333_P10_1/P10_1_20200511T032500_ARU.wav"
#> [29] "o_BARLT10962_P01_1/P01_1_20200502T050000-0400_ARU.wav"
#> [30] "o_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav"
#> [31] "o_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav"
#> [32] "o_S4A01234_P02_1/P02_1_20200505T073000_ARU.wav"
#> [33] "o_BARLT10962_P03_1/P03_1_20200506T100000-0400_ARU.wav"
#> [34] "o_BARLT11111_P04_1/P04_1_20200506T050000-0400_ARU.wav"
#> [35] "o_BARLT11111_P04_1/P04_1_20200507T032500-0400_ARU.wav"
#> [36] "o_BARLT10962_P05_1/P05_1_20200507T050000-0400_ARU.wav"
#> [37] "o_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav"
#> [38] "o_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav"
#> [39] "o_S4A01234_P07_1/P07_1_20200510T073000_ARU.wav"
#> [40] "o_BARLT10962_P08_1/P08_1_20200511T100000-0400_ARU.wav"
#> [41] "o_S4A02222_P09_1/P09_1_20200511T050000_ARU.wav"
#> [42] "o_S4A03333_P10_1/P10_1_20200511T032500_ARU.wav"