Tutorial
Tutorial.rmd
# Tutorial
The best way to interact with the network is to instantiate an object
of buyer
class with the buyer$new()
.
Querying health datasets
Executing a query : For executing a query, we get the api key and ip
address from the web dashboard and then create a buyer
instance and then call a query method on that buyer instance with a SQL
query string and then print recieved query accuracy and results.
source("sdk.r")
api_key <- "buyer_key"
ip_addr <- "127.0.0.1"
buyer <- buyer$new(api_key, ip_addr)
# retrieve the query sub key
query_key <- buyer$get_key()
print(query_key)
# Initiate a query
sql_query <- "select count(*) as numpeople from public.condition_era_death"
result <- buyer$query(query = sql_query)
sql_query <- "select count(*) as people_per_condition from person"
result <- buyer$query(query = sql_query)
print(result)
NOTE!! : Related to SQL querying rules
- allowed: Group by, selection of count, sum/min/max/avg of limited columns, of raw columns that are the group-by columns, any where conditions
- not allowed: join, selection of columns that arent in the group by, arbitrary sum/min/max/avgs
Pretty print query history
print_query_history(buyer)
"+------------------------------------------------------------------------------------------+"
"select count(*) as numpeople from public.condition_era_death"
"list(list(\"numpeople\"), list(30297))"
"Results of this query can be expected to be within: 0.01% of the true value with 95% probability"
"+------------------------------------------------------------------------------------------+"
"select count(*) as people_per_condition from person"
"list(list(\"people_per_condition\"), list(2000000))"
"Results of this query can be expected to be within: 0.0% of the true value with 95% probability"
Get queryable table names
<- buyer$get_columns()
columns print(names(columns))
1] "CONDITION_ERA"
[2] "CONDITION_ERA_CONDITION_OCCURRENCE"
[3] "CONDITION_ERA_DEATH"
[4] "CONDITION_ERA_DEVICE_EXPOSURE"
[5] "CONDITION_ERA_DOSE_ERA"
[6] "CONDITION_ERA_DRUG_ERA"
[7] "CONDITION_ERA_DRUG_EXPOSURE"
[8] "CONDITION_ERA_EPISODE"
[
...
...166] "SPECIMEN"
[167] "SPECIMEN_VISIT_DETAIL"
[168] "SPECIMEN_VISIT_OCCURRENCE"
[169] "VISIT_DETAIL"
[170] "VISIT_DETAIL_VISIT_OCCURRENCE"
[171] "VISIT_OCCURRENCE"
[
Get queryable table columns
<- buyer$get_columns()
columns print(columns$PERSON)
1] "birth_datetime" "care_site_id"
[3] "day_of_birth" "ethnicity_concept_id"
[5] "ethnicity_source_concept_id" "ethnicity_source_value"
[7] "gender_concept_id" "gender_source_concept_id"
[9] "gender_source_value" "location_id"
[11] "month_of_birth" "person_id"
[13] "person_source_value" "provider_id"
[15] "race_concept_id" "race_source_concept_id"
[17] "race_source_value" "year_of_birth" [