5 Database for Calculator Shiny

In this chapter we are going to create database, which we can use in Shiny Applications as well in results-section. In Shiny app user can add his input values for sleep apnea prevalences, condition prevalences and costs, and so get more specific results.

library(duckdb)
library(arrow)
library(dplyr)
options(scipen = 999)

## Load duckdb
con = dbConnect(duckdb::duckdb(), dbdir="data/sleep22.duckdb", read_only=TRUE)

## Load from parquet files to duckdb mem
# if(FALSE){
#   con = dbConnect(duckdb::duckdb(), dbdir=":MEMORY:", read_only=FALSE)
#   fils <- list.files("data/parquet_database", full.names = T)
#   for (fi in fils) {
#     d <- arrow::read_parquet(file = fi)
#     nm <- gsub(pattern = "data/parquet_database/", replacement = "", x = fi)
#     nm <- gsub(pattern = ".parquet", replacement = "", x = nm)
#     duckdb::duckdb_register(conn = con, name = nm, df = d)
#   }
#   dbListTables(con)
# }

5.1 Population datasets

Calculating 15-74 years old populations

## 15-74yrs Both, female and male population in wide format
library(tidyr)
tbl(con, "pop") %>% 
  filter(age_group > 14 & age_group < 75 & year_id == 2019) %>% 
  group_by(location_name, sex_name) %>% 
  summarise(pop = sum(total_population)) %>% 
  collect() %>% 
  pivot_wider(names_from = sex_name, values_from = pop) %>% 
  rename(pop_both = both,
         pop_female = female,
         pop_male = male) %>% 
  mutate(
    pop_both = round(pop_both, 0),
    pop_female = round(pop_female, 0),
    pop_male = round(pop_male, 0),
    ages = "15-74"
  ) -> pop
## `summarise()` has grouped output by "location_name". You can override using the `.groups` argument.
head(pop)
## # A tibble: 6 × 5
## # Groups:   location_name [6]
##   location_name pop_both pop_female pop_male ages 
##   <chr>            <dbl>      <dbl>    <dbl> <chr>
## 1 Albania        2085677    1042814  1042863 15-74
## 2 Armenia        2267463    1187751  1079711 15-74
## 3 Austria        6785345    3390812  3394534 15-74
## 4 Azerbaijan     7752896    3933115  3819781 15-74
## 5 Belarus        7249252    3828210  3421042 15-74
## 6 Belgium        8459697    4236476  4223220 15-74

Total populations

## Total Both, female and male population in wide format
library(tidyr)
tbl(con, "pop") %>% 
  filter(year_id == 2019) %>% 
  group_by(location_name, sex_name) %>% 
  summarise(pop = sum(total_population)) %>% 
  collect() %>% 
  pivot_wider(names_from = sex_name, values_from = pop) %>% 
  rename(pop_both = both,
         pop_female = female,
         pop_male = male) %>% 
  mutate(
    pop_both = round(pop_both, 0),
    pop_female = round(pop_female, 0),
    pop_male = round(pop_male, 0),
    ages = "total"
    ) -> popu_info
## `summarise()` has grouped output by "location_name". You can override using the `.groups` argument.
head(popu_info)
## # A tibble: 6 × 5
## # Groups:   location_name [6]
##   location_name pop_both pop_female pop_male ages 
##   <chr>            <dbl>      <dbl>    <dbl> <chr>
## 1 Albania        2720353    1357945  1362408 total
## 2 Armenia        3019674    1562196  1457478 total
## 3 Austria        8916185    4522218  4393968 total
## 4 Azerbaijan    10278674    5136802  5141872 total
## 5 Belarus        9500785    5072640  4428145 total
## 6 Belgium       11419166    5800262  5618903 total
## One dataset
pop <- pop %>% rbind(popu_info)

5.2 Prevalences from ihme dataset

Calculating prevalences percentages by joining prevalences and population information

## Get cause_id's which we can use from IHME dataset
tbl(con, "causes") %>% 
  filter(type == "prevalence") %>%
  select(cause_id) %>% 
  collect() -> causes
causes <- causes$cause_id[!is.na(causes$cause_id)]

## Get number of prevalences per location and causes from IHME (age: 15-74 and sex: both)
tbl(con, "ihme") %>% 
  filter(cause_id %in% causes & metric_name == "Number" & age_id > 7 & age_id < 20 & sex_id == 3) %>% 
  group_by(location_name, cause_id, cause_name) %>% 
  summarise(prevalence_number = sum(val)) %>% 
  arrange(location_name, cause_id) %>% 
  collect() -> prevalences_ihme
## `summarise()` has grouped output by "location_name" and "cause_id". You can override using the `.groups` argument.
## Calculate Prevalence Rates: join tables prevalences_ihme & pop
prevalences_ihme %>% 
  right_join(pop %>% filter(ages == "15-74"), by = "location_name") %>% 
  mutate(prevalence = prevalence_number / pop_both
         )  %>% 
  select(location_name, cause_id, cause_name, prevalence) -> prevalences_ihme

head(prevalences_ihme)
## # A tibble: 6 × 4
## # Groups:   location_name, cause_id [6]
##   location_name cause_id cause_name                            prevalence
##   <chr>            <dbl> <chr>                                      <dbl>
## 1 Albania            369 Maternal hypertensive disorders          0.00142
## 2 Albania            381 Neonatal preterm birth                   0.00547
## 3 Albania            493 Ischemic heart disease                   0.0376 
## 4 Albania            494 Stroke                                   0.0168 
## 5 Albania            536 Gastroesophageal reflux disease          0.137  
## 6 Albania            594 Urinary diseases and male infertility    0.0997

5.3 Mortalities from ihme

Calculating mortalities for two diseases and calculating percentages by joining mortality and population information

  • 1023 Other cardiovascular and circulatory diseases
  • 294 All causes
## Get cause_id's which base values we can use to overwrite from IHME data
tbl(con, "causes") %>% 
  filter(type == "mortality") %>% 
  select(cause_id) %>% 
  collect() -> causes
causes <- causes$cause_id[!is.na(causes$cause_id)]

## Get mortality values from ihme
tbl(con, "ihme") %>% 
  filter(cause_id %in% causes & measure_name == "Deaths" & metric_name == "Number" & age_id > 7 & age_id < 20 & sex_id == 3) %>% 
  group_by(location_name, cause_id, cause_name) %>% 
  summarise(mortality_number = sum(val)) %>% 
  arrange(location_name, cause_id) %>% 
  collect() -> mortalities_ihme
## `summarise()` has grouped output by "location_name" and "cause_id". You can override using the `.groups` argument.
## Calculate Rates: join tables mortalities_ihme & pop
mortalities_ihme %>% 
  right_join(pop %>% filter(ages == "15-74"), by = "location_name") %>% 
  mutate(mortality = mortality_number / pop_both) %>% 
  select(location_name, cause_id, cause_name, mortality) -> mortalities_ihme

head(mortalities_ihme)
## # A tibble: 6 × 4
## # Groups:   location_name, cause_id [6]
##   location_name cause_id cause_name                                    mortality
##   <chr>            <dbl> <chr>                                             <dbl>
## 1 Albania            294 All causes                                    0.00423  
## 2 Albania           1023 Other cardiovascular and circulatory diseases 0.0000757
## 3 Armenia            294 All causes                                    0.00573  
## 4 Armenia           1023 Other cardiovascular and circulatory diseases 0.0000190
## 5 Austria            294 All causes                                    0.00364  
## 6 Austria           1023 Other cardiovascular and circulatory diseases 0.0000202
## Join mortalities and prevalance data. Add name to describe value and rename 'mortality'/'prevalence' to 'value'.
prevalences_ihme$type <- "prevalence"
mortalities_ihme$type <- "mortality"
ihme <- prevalences_ihme %>%
  rbind(mortalities_ihme) %>% 
  mutate(ihme = ifelse(is.na(prevalence), mortality, prevalence)) %>% 
  select(location_name, cause_id, cause_name, type, ihme) %>% 
  arrange(location_name, type, cause_id)
head(ihme)
## # A tibble: 6 × 5
## # Groups:   location_name, cause_id [6]
##   location_name cause_id cause_name                                    type            ihme
##   <chr>            <dbl> <chr>                                         <chr>          <dbl>
## 1 Albania            294 All causes                                    mortality  0.00423  
## 2 Albania           1023 Other cardiovascular and circulatory diseases mortality  0.0000757
## 3 Albania            369 Maternal hypertensive disorders               prevalence 0.00142  
## 4 Albania            381 Neonatal preterm birth                        prevalence 0.00547  
## 5 Albania            493 Ischemic heart disease                        prevalence 0.0376   
## 6 Albania            494 Stroke                                        prevalence 0.0168
rm(list=c("mortalities_ihme", "prevalences_ihme"))

5.4 Add also base values

Base data (from excel) and country specific prevalences & mortalities

## Simple causes data / PAF and base values for each country
prevalences_base_simple <- NULL
for(cntry in unique(ihme$location_name)){
  tbl(con, "causes_simple") %>% 
    collect() %>% 
    mutate(location_name = cntry) %>%
    select("location_name", 
           "cause_id", 
           "condition", 
           "PAF", 
           "multiplier",
           "prevalence_base_italy", 
           "direct_healthcare_cost",
           "direct_nonhealthcare_cost", 
           "productivity_losses_cost") %>% 
    rbind(prevalences_base_simple) -> prevalences_base_simple
}
head(prevalences_base_simple) #1296
## # A tibble: 6 × 9
##   location_name  cause_id condition                  PAF multiplier prevalence_base_italy direct_healthcare_cost direct_nonhealthcare_cost produc…¹
##   <chr>             <dbl> <chr>                    <dbl>      <dbl>                 <dbl>                  <dbl>                     <dbl>    <dbl>
## 1 United Kingdom      294 All-cause mortality      0.075      1                     0.003                     NA                        NA       NA
## 2 United Kingdom     1023 Cardiovascular mortality 0.227      1                     0.001                     NA                        NA       NA
## 3 United Kingdom     1029 Cancer Overall           0.097      1                     0.042                   5718                      4581      119
## 4 United Kingdom      669 Diabetic retinopathy     0.207      0.124                 0.026                    307                       242      579
## 5 United Kingdom      998 Diabetic kidney disease  0.135      1                     0.015                    797                        NA       NA
## 6 United Kingdom      976 Type 2 diabetes          0.145      1                     0.068                   3866                        NA     4352
## # … with abbreviated variable name ¹​productivity_losses_cost
## Multirow causes data / PAF and base values for each country
prevalences_base <- NULL
for(cntry in unique(ihme$location_name)){
  # print(cntry)
  # tbl(con, "causes") %>% 
  tbl(con, "causes") %>% 
    collect() %>% 
    mutate(location_name = cntry) %>% 
    select("location_name", 
           "cause_id", 
           "condition", 
           "OSA_severity",
           "gender",
           "OR",
           "RR",
           "PAF", 
           "multiplier",
           "prevalence_base_italy", 
           "direct_healthcare_cost",
           "direct_nonhealthcare_cost", 
           "productivity_losses_cost") %>% 
    rbind(prevalences_base) -> prevalences_base
}
head(prevalences_base) #1296
## # A tibble: 6 × 13
##   location_name  cause_id condition                OSA_severity    gender    OR    RR   PAF multiplier prevalence_base_it…¹ direc…² direc…³ produ…⁴
##   <chr>             <dbl> <chr>                    <chr>           <chr>  <dbl> <dbl> <dbl>      <dbl>                <dbl>   <dbl>   <dbl>   <dbl>
## 1 United Kingdom      294 All-cause mortality      Severe          Both   NA     1.54 0.075      1                    0.003      NA      NA      NA
## 2 United Kingdom     1023 Cardiovascular mortality Severe          Both   NA     2.96 0.227      1                    0.001      NA      NA      NA
## 3 United Kingdom     1029 Cancer Overall           Overall         Both   NA     1.4  0.097      1                    0.042    5718    4581     119
## 4 United Kingdom      669 Diabetic retinopathy     Overall         Both    2.01 NA    0.207      0.124                0.026     307     242     579
## 5 United Kingdom      998 Diabetic kidney disease  Overall         Both    1.59 NA    0.135      1                    0.015     797      NA      NA
## 6 United Kingdom      976 Type 2 diabetes          Moderate-Severe Both   NA     1.63 0.145      1                    0.068    3866      NA    4352
## # … with abbreviated variable names ¹​prevalence_base_italy, ²​direct_healthcare_cost, ³​direct_nonhealthcare_cost, ⁴​productivity_losses_cost
# 1296 *2 2592

With base values, join new calculated prevalences/mortalities from IHME

## Join base data and new calculated prevalences
## Calculate condition prevalences from ihme data using multiplier (=ratio between armeni and ihme condition)
prevalences <- prevalences_base %>%
  left_join(ihme, by = c("location_name", "cause_id")) %>% 
  select(location_name, condition, OSA_severity, gender, cause_id, cause_name, multiplier, ihme, prevalence_base_italy, OR, RR, PAF, direct_healthcare_cost, direct_nonhealthcare_cost, productivity_losses_cost) %>% 
  right_join(
    pop %>% 
      filter(ages == "15-74") %>% 
      select(location_name, pop_both, pop_female, pop_male),
    by = "location_name") %>%  ## Add the population to all rows
  mutate(#tamaluku = ihme,
         ihme = ifelse(is.na(ihme), NA, multiplier * ihme),
         multiplier = NULL)


rm(list = c("prevalences_base"))

And same to simple table

## Join base data and new calculated prevalences
## Calculate condition prevalences from ihme data using multiplier (=ratio between armeni and ihme condition)
prevalences_simple <- prevalences_base_simple %>%
  left_join(ihme, by = c("location_name", "cause_id")) %>% 
  select(location_name, condition, cause_id, cause_name, multiplier, ihme, prevalence_base_italy,  PAF, direct_healthcare_cost, direct_nonhealthcare_cost, productivity_losses_cost) %>% 
  right_join(
    pop %>% 
      filter(ages == "15-74") %>% 
      select(location_name, pop_both, pop_female, pop_male),
    by = "location_name") %>%  ## Add the population to all rows
  mutate(ihme = ifelse(is.na(ihme), NA, multiplier * ihme),
         multiplier = NULL)


head(prevalences_simple)
## # A tibble: 6 × 13
##   location_name  condition                cause_id cause_name                    ihme preva…¹   PAF direc…² direc…³ produ…⁴ pop_b…⁵ pop_f…⁶ pop_m…⁷
##   <chr>          <chr>                       <dbl> <chr>                        <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 United Kingdom All-cause mortality           294 All causes                 4.02e-3   0.003 0.075      NA      NA      NA  4.99e7  2.50e7  2.48e7
## 2 United Kingdom Cardiovascular mortality     1023 Other cardiovascular and … 4.97e-5   0.001 0.227      NA      NA      NA  4.99e7  2.50e7  2.48e7
## 3 United Kingdom Cancer Overall               1029 Total cancers              3.80e-2   0.042 0.097    5718    4581     119  4.99e7  2.50e7  2.48e7
## 4 United Kingdom Diabetic retinopathy          669 Sense organ diseases       2.52e-2   0.026 0.207     307     242     579  4.99e7  2.50e7  2.48e7
## 5 United Kingdom Diabetic kidney disease       998 Chronic kidney disease du… 1.28e-2   0.015 0.135     797      NA      NA  4.99e7  2.50e7  2.48e7
## 6 United Kingdom Type 2 diabetes               976 Diabetes mellitus type 2   1.32e-1   0.068 0.145    3866      NA    4352  4.99e7  2.50e7  2.48e7
## # … with abbreviated variable names ¹​prevalence_base_italy, ²​direct_healthcare_cost, ³​direct_nonhealthcare_cost, ⁴​productivity_losses_cost,
## #   ⁵​pop_both, ⁶​pop_female, ⁷​pop_male
rm(list = c("prevalences_base_simple", "ihme"))

5.5 Table: prevalences and mortalities

DT::datatable(prevalences, rownames = FALSE)

5.6 OSA values from article (Excel)

### TODO left here, get prevelance data for slider
library(data.table)
## Read from source excel
osa <- readxl::read_xlsx("files/osa_tables.xlsx", sheet = "osa_armeni")
## Sleep apnea prevalences
osanew <- readxl::read_xlsx("files/osa_tables.xlsx", sheet = "osa_benjafield")
osanew$location_name
##   [1] "Afghanistan"                      "Albania"                          "Algeria"                          "Angola"                          
##   [5] "Antigua and Barbuda"              "Argentina"                        "Armenia"                          "Aruba"                           
##   [9] "Australia"                        "Austria"                          "Azerbaijan"                       "Bahamas"                         
##  [13] "Bahrain"                          "Bangladesh"                       "Barbados"                         "Belarus"                         
##  [17] "Belgium"                          "Belize"                           "Benin"                            "Bhutan"                          
##  [21] "Bolivia"                          "Bosnia and Herzegovina"           "Botswana"                         "Brazil"                          
##  [25] "Brunei"                           "Bulgaria"                         "Burkina Faso"                     "Burundi"                         
##  [29] "Cape Verde"                       "Cambodia"                         "Cameroon"                         "Canada"                          
##  [33] "Central African Republic"         "Chad"                             "Chile"                            "China"                           
##  [37] "Colombia"                         "Comoros"                          "Congo"                            "Costa Rica"                      
##  [41] "Côte d'Ivoire"                    "Croatia"                          "Cuba"                             "Curacao"                         
##  [45] "Cyprus"                           "Czech Republic"                   "Democratic Republic of the Congo" "Denmark"                         
##  [49] "Djibouti"                         "Dominican Republic"               "Ecuador"                          "Egypt"                           
##  [53] "El Salvador"                      "Equatorial Guinea"                "Eritrea"                          "Estonia"                         
##  [57] "eSwatini"                         "Ethiopia"                         "Federated States of Micronesia"   "Fiji"                            
##  [61] "Finland"                          "France"                           "French Polynesia"                 "Gabon"                           
##  [65] "Gambia"                           "Georgia"                          "Germany"                          "Ghana"                           
##  [69] "Greece"                           "Grenada"                          "Guam"                             "Guatemala"                       
##  [73] "Guinea"                           "Guinea-Bissau"                    "Guyana"                           "Haiti"                           
##  [77] "Honduras"                         "Hong Kong"                        "Hungary"                          "Iceland"                         
##  [81] "India"                            "Indonesia"                        "Iran"                             "Iraq"                            
##  [85] "Ireland"                          "Israel"                           "Italy"                            "Jamaica"                         
##  [89] "Japan"                            "Jordan"                           "Kazakhstan"                       "Kenya"                           
##  [93] "Kiribati"                         "Kuwait"                           "Kyrgyzstan"                       "Laos"                            
##  [97] "Latvia"                           "Lebanon"                          "Lesotho"                          "Liberia"                         
## [101] "Libya"                            "Lithuania"                        "Luxembourg"                       "Macao"                           
## [105] "Macedonia"                        "Madagascar"                       "Malawi"                           "Malaysia"                        
## [109] "Maldives"                         "Mali"                             "Malta"                            "Mauritania"                      
## [113] "Mauritius"                        "Mexico"                           "Moldova"                          "Mongolia"                        
## [117] "Montenegro"                       "Morocco"                          "Mozambique"                       "Namibia"                         
## [121] "Nepal"                            "Netherlands"                      "New Caledonia"                    "New Zealand"                     
## [125] "Nicaragua"                        "Niger"                            "Nigeria"                          "North Korea"                     
## [129] "Norway"                           "Oman"                             "Pakistan"                         "Panama"                          
## [133] "Papua New Guinea"                 "Paraguay"                         "Peru"                             "Philippines"                     
## [137] "Poland"                           "Portugal"                         "Puerto Rico"                      "Qatar"                           
## [141] "Romania"                          "Russia"                           "Rwanda"                           "Saint Lucia"                     
## [145] "Saint Vincent and the Grenadines" "Samoa"                            "São Tomé and Principe"            "Saudi Arabia"                    
## [149] "Senegal"                          "Serbia"                           "Seychelles"                       "Sierra Leone"                    
## [153] "Singapore"                        "Slovakia"                         "Slovenia"                         "Solomon Islands"                 
## [157] "Somalia"                          "South Africa"                     "South Korea"                      "South Sudan"                     
## [161] "Spain"                            "Sri Lanka"                        "Sudan"                            "Suriname"                        
## [165] "Sweden"                           "Switzerland"                      "Syria"                            "Taiwan"                          
## [169] "Tajikistan"                       "Tanzania"                         "Thailand"                         "Timor Leste"                     
## [173] "Togo"                             "Tonga"                            "Trinidad and Tobago"              "Tunisia"                         
## [177] "Turkey"                           "Turkmenistan"                     "Uganda"                           "Ukraine"                         
## [181] "United Arab Emirates"             "United Kingdom"                   "USA"                              "Uruguay"                         
## [185] "Uzbekistan"                       "Vanuatu"                          "Venezuela"                        "Vietnam"                         
## [189] "Virgin Islands"                   "Western Sahara"                   "Yemen"                            "Zambia"                          
## [193] "Zimbabwe"
## How many countries are needed
length(unique(prevalences$location_name))
## [1] 45
## Check which countries are not included in excel
length(osanew$location_name[osanew$location_name %in% unique(prevalences$location_name)])
## [1] 45
## 3 countries missing, check names which are missing
unique(prevalences$location_name)[!unique(prevalences$location_name) %in% osanew$location_name]
## character(0)
## these are not included # [1] "Andorra"        "Monaco"         "San Marino"  

## Filter dataset and Calculate Mild, Moderate, Severe values separately
osanew <-osanew %>% 
  filter(location_name %in% unique(prevalences$location_name)) %>% 
  mutate(
    Mild = `Mild-Moderate-Severe` - `Moderate-Severe`,
    `Male_Moderate-Severe` = 1.3823 * `Moderate-Severe`, # Gender proportion is calculated from Italy data from Armeni article data
    `Female_Moderate-Severe` = 0.6508 * `Moderate-Severe`, # Gender proportion is calculated from Italy data from Armeni article data
    `Both_Moderate-Severe` = `Moderate-Severe`, # Gender proportion is calculated from Italy data from Armeni article data
    Both_Mild = Mild,
    Male_Mild =  0.9519 * Mild, # Gender proportion is calculated from Italy data from Armeni article data
    Female_Mild = 1.0440 * Mild # Gender proportion is calculated from Italy data from Armeni article data
  )

osanew <- osanew %>% 
  mutate(
    ## TODO check these with comments
    Female_Moderate = 0.5342 * `Female_Moderate-Severe`, # Moderate proportion percentage is calculated from Italy data from Armeni article data
    Female_Severe =  0.4658 * `Female_Moderate-Severe`,   # Severe proportion percentage is calculated from Italy data from Armeni article data
    Male_Moderate = 0.4004 * `Male_Moderate-Severe`, # Moderate proportion percentage is calculated from Italy data from Armeni article data
    Male_Severe =  0.5996 * `Male_Moderate-Severe` ,
    Both_Moderate = 0.445907 * `Both_Moderate-Severe`,
    Both_Severe = 0.554093 * `Both_Moderate-Severe`
  )

## To longer format
library(tidyr)
osanew <- osanew %>%
  tidyr::pivot_longer(c(`Male_Moderate-Severe`, `Male_Moderate`, `Male_Severe`, Male_Mild, `Female_Moderate-Severe`, `Female_Moderate`, `Female_Severe`,  Female_Mild, `Both_Moderate-Severe`, `Both_Moderate`, `Both_Severe`, Both_Mild), names_to = "OSA_severity") %>%
  rename(osa_rate=value) %>%
  mutate(
    ## Get gender and OSA_severity right
    gender = sub("_.*", "", OSA_severity),
    OSA_severity = sub('.+_(.+)', '\\1', OSA_severity) 
    ) %>% 
  select(location_name, OSA_severity, gender, osa_rate)

5.7 Money index Excel

## First try HICP index, what countries are included -----
## https://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=prc_hicp_aind&lang=en
hicp <- readxl::read_xls("files/prc_hicp_aind.xls")
hicp$`GEO/TIME`[hicp$`GEO/TIME` == "Germany (until 1990 former territory of the FRG)"] <- "Germany" ## change name of the country

## Calculate correction index to all other countries
hicp %>% 
  filter(`GEO/TIME` %in% unique(prevalences$location_name)) %>% 
  rename(location_name = `GEO/TIME`) %>% 
  select(location_name, `2019`) %>% 
  mutate(index = `2019` / hicp$`2019`[hicp$`GEO/TIME` == "Italy"],
         ver = "EuroStat") %>% 
  select(location_name, index, ver) -> money_correction

## This is missing few countries so their index correction is 1 (equal to Italy)
## TODO find a way to correct these
# unique(prevalences_base$location_name[!prevalences_base$location_name %in% money_correction$location_name])
dbDisconnect(con, shutdown=TRUE)

5.8 Save data to new database

5.8.1 duckdb

# library(duckdb)
# con = dbConnect(duckdb::duckdb(), dbdir="./data/sleep22_shiny.duckdb", read_only=FALSE)
# dbWriteTable(con, "prevalences", prevalences, overwrite=TRUE)
# dbWriteTable(con, "pop", pop, overwrite=TRUE)
# dbWriteTable(con, "osa", osa, overwrite=TRUE)
# dbWriteTable(con, "osanew", osanew, overwrite=TRUE)
# dbWriteTable(con, "money_correction", money_correction, overwrite=TRUE)
# dbDisconnect(con)
# 
# con = dbConnect(duckdb::duckdb(), dbdir="./data/sleep22_shiny.duckdb", read_only=FALSE)
# if(dir.exists("data/parquet_shiny/")) fs::dir_delete("data/parquet_shiny/")
# if(!dir.exists("data/parquet_shiny/")) dir.create("data/parquet_shiny/")
# dbSendQuery(con, "EXPORT DATABASE 'data/parquet_shiny/' (FORMAT PARQUET);") # copy komento? COPY taulu TO ... FORMAT 

# dbDisconnect(conn = con)
## Copy newdb to sleep22calculator
# fils <- c(paste0(here::here(), "/data/sleep22_shiny.duckdb"), paste0(here::here(), "/data/sleep22_shiny.duckdb.wal"))
# fs::file_copy(fils, "/Users/japmiett/projects/sleep22calculator/", overwrite = T)

5.8.2 Parquet

## Testing another way to save database only to parquet files
con = dbConnect(duckdb::duckdb(), dbdir=":memory:", read_only=FALSE) ## TODO not working
## Register tbls
duckdb::duckdb_register(conn = con, name = "prevalences", df = prevalences)
duckdb::duckdb_register(conn = con, name = "prevalences_simple", df = prevalences_simple)
duckdb::duckdb_register(conn = con, name = "pop", df = pop)
duckdb::duckdb_register(conn = con, name = "osa", df = osa)
duckdb::duckdb_register(conn = con, name = "osanew", df = osanew)
duckdb::duckdb_register(conn = con, name = "money_correction", df = money_correction)
## Save to parquet
dbSendQuery(con, "COPY (SELECT * FROM prevalences) TO 'data/parquet_shiny/prevalences.parquet' (FORMAT 'parquet');")
## <duckdb_result 861b0 connection=ef300 statement='COPY (SELECT * FROM prevalences) TO 'data/parquet_shiny/prevalences.parquet' (FORMAT 'parquet');'>
dbSendQuery(con, "COPY (SELECT * FROM pop) TO 'data/parquet_shiny/pop.parquet' (FORMAT 'parquet');")
## <duckdb_result f9890 connection=ef300 statement='COPY (SELECT * FROM pop) TO 'data/parquet_shiny/pop.parquet' (FORMAT 'parquet');'>
dbSendQuery(con, "COPY (SELECT * FROM osa) TO 'data/parquet_shiny/osa.parquet' (FORMAT 'parquet');")
## <duckdb_result 7af80 connection=ef300 statement='COPY (SELECT * FROM osa) TO 'data/parquet_shiny/osa.parquet' (FORMAT 'parquet');'>
dbSendQuery(con, "COPY (SELECT * FROM osanew) TO 'data/parquet_shiny/osanew.parquet' (FORMAT 'parquet');")
## <duckdb_result 93570 connection=ef300 statement='COPY (SELECT * FROM osanew) TO 'data/parquet_shiny/osanew.parquet' (FORMAT 'parquet');'>
dbSendQuery(con, "COPY (SELECT * FROM money_correction) TO 'data/parquet_shiny/money_correction.parquet' (FORMAT 'parquet');")
## <duckdb_result 7dfe0 connection=ef300 statement='COPY (SELECT * FROM money_correction) TO 'data/parquet_shiny/money_correction.parquet' (FORMAT 'parquet');'>
dbSendQuery(con, "COPY (SELECT * FROM prevalences_simple) TO 'data/parquet_shiny/prevalences_simple.parquet' (FORMAT 'parquet');")
## <duckdb_result 32150 connection=ef300 statement='COPY (SELECT * FROM prevalences_simple) TO 'data/parquet_shiny/prevalences_simple.parquet' (FORMAT 'parquet');'>
dbDisconnect(conn = con)
## Copy to sleep22calculator
fils <- list.files("data/parquet_shiny/", full.names = T) 
fs::file_copy(fils, "/Users/japmiett/projects/sleep22calculator/data/", overwrite = T)