* University of Otago, Wellington;

* Impact of earthquake housing damage on cardiovascular disease after the Canterbury earthquakes 2010-2015;

* Research was funded by Healthier Lives National Science Challenge, and is affiliated with the Virtual Health Information Network vhin.co.nz.

* By Andrea Teng, March 2017, using Sep 2016 refresh;

* This program links EQC earthquake residential building damage to a population resident in NZ on the 4th Sep 2010, and then examines their hospitalisation data for cardiovascular admissions, myocardial infarction admissions and cardiovascular deaths.

* WARNING this code is not very efficient to run, and is best run overnight

* This code is an example of how the IDI can be used to construct a population dataset. Please note that without access to the EQC data formats used in this research, the code will not be able to run cleanly. The code was designed to run when the Sep 2016 refresh was current, but would need to be updated to run cleanly with in more recent refreshes;

* Outline of this SAS program;

*1. Produce a NZ estimated residential population and then add demographic data such as ethnicity and income;

*2. Add cardiovascular disease hospital admission and mortality data to the population;

*3. Add meshblock address information, level of deprivation index and EQC earthquake housing damage;

*4. In the combined dataset, create cardiovascular event flags, and person-time follow-up post-earthquake;

*5. See attached files for age standardised cardiovascular disease event rates by level of housing area damage and for the Poisson regression analysis of the association between area housing damage and cardiovascular events;

* Setting up file path names;

%let basepath=; *insert pathname for datalab project directory;

libname dlab "&basepath\EarthquakeCVD";

libname extra "&basepath\EarthquakeCVD\Extra";

libname chk "&basepath\Checking\Andrea";

libname moh ODBC dsn=idi_clean_archive_srvprd schema=moh_clean;

libname data ODBC dsn=idi_clean_archive_srvprd schema=data;

libname sandmoh ODBC dsn=idi_sandpit_srvprd schema="clean_read_MOH_Health_Tracker";

libname sand ODBC dsn=idi_sandpit_srvprd schema="DL-MAA2015-53";

libname metadata ODBC dsn=idi_metadata_srvprd schema=clean_read_CLASSIFICATIONS;

%include "&basepath\EarthquakeCVD\EQCtravelling.sas"; *NOTE: these are meshblock formats for housing damage that have been derrived from EQC data and have been approved for use on this project via a MoU between EQC and UOW (but not more widely);

ods graphics/width=20cm height=13cm Labelmax=50;

*formats used in this program;

proc format;

value fdhbdom

013='';

value fssex

1='Male'

2='Female'

9='Missing';

value fdamcat

0='Rest of NZ'

1='$0-999'

2='$1,000-9,999'

3='$10,000-49,999'

4='$50,000 + '

9='Missing region';

value fdamcatot

0,7='Rest of NZ'

1='$0-4,999'

2='$5,000-19,999'

3='$20,000-49,999'

4='$50,000 + '

8='Missing damage'

9='Missing region';

value fdamcatots

0='Rest of NZ'

-1,1='$0-4,999'

-2,2='$5,000-19,999'

-3,3='$20,000-49,999'

-4,4='$50,000 +'

-8,-9,8,9='Missing';

value f2damcatots

0='Rest of NZ'

-1,1='$0-24,999'

-2,2='$25,000-49,999'

-3,3='$50,000-99,999'

-4,4='$100,000 +'

-8,-9,8,9='Missing';

value fpdamcatots

0='Rest of NZ'

-1,1='< 3%'

-2,2='3-5%'

-3,3='6-14%'

-4,4='15% +'

-8,-9,8,9='Missing';

value fcant

0= 'Rest of NZ'

1= 'Canterbury'

.= 'Missing';

value feth

1='Maori'

2='Pacific'

3='Asian'

4='European/Other';

value fdepq

-1,1='Least deprived quintile'

-2,2='Second least deprived quintile'

-3,3='Middle deprivation quintile'

-4,4='Second most deprived quintile'

-5,5='Most deprived quintile';

*these formats are from June Atkinsons code, 20th April 2016;

invalue $iAnyCVD

'I00'-'I00XX', 'I010'-'I012X', 'I018'-'I020X', 'I029'-'I029X',

'I050'-'I052X', 'I058'-'I062X', 'I068'-'I072X', 'I078'-'I083X',

'I088'-'I092X', 'I098'-'I099X', 'I10'-'I10XX', 'I110'-'I110X',

'I119'-'I120X', 'I129'-'I132X', 'I139'-'I139X', 'I150'-'I152X',

'I158'-'I159X', 'I200'-'I201X', 'I208'-'I214X', 'I219'-'I221X',

'I228'-'I236X', 'I238'-'I238X', 'I240'-'I241X', 'I248'-'I250X',

'I2510'-'I2513','I252'-'I256X', 'I258'-'I260X', 'I268'-'I272X',

'I278'-'I281X', 'I288'-'I289X', 'I300'-'I301X', 'I308'-'I313X',

'I318'-'I321X', 'I328'-'I328X', 'I330'-'I330X', 'I339'-'I342X',

'I348'-'I352X', 'I358'-'I362X', 'I368'-'I372X', 'I378'-'I379X',

'I38'-'I38XX', 'I390'-'I394X', 'I398'-'I398X', 'I400'-'I401X',

'I408'-'I412X', 'I418'-'I418X', 'I420'-'I432X', 'I438'-'I438X',

'I440'-'I447X', 'I450'-'I456X', 'I458'-'I461X', 'I469'-'I472X',

'I479'-'I479X', 'I48'-'I48XX', 'I490'-'I495X', 'I498'-'I501X',

'I509'-'I521X', 'I528'-'I528X', 'I600'-'I616X', 'I618'-'I621X',

'I629'-'I636X', 'I638'-'I639X', 'I64'-'I64XX', 'I650'-'I653X',

'I658'-'I664X', 'I668'-'I682X', 'I688'-'I688X', 'I690'-'I694X',

'I698'-'I698X', 'I700'-'I701X', 'I7020'-'I7024','I708'-'I709X',

'I7100'-'I7103','I711'-'I716X', 'I718'-'I724X', 'I728'-'I729X',

'I738'-'I739X', 'I790'-'I790X', 'I980'-'I981X' ='ACVD' /*AnyCVD*/

other='???';

/*Multiple CVD codes (all ICD10 4 characters)*/

invalue $i4CVD

'G453','G460'-'G465','G467','G468','I650'-'I653','I658'-'I664',

'I668'-'I682','I688','I690'-'I694','I698'='CERE' /*CEREBVAS ICD10 (4 characters)*/

'I110','I130','I132','I420'-'I422','I426'-'I432','I438','I500',

'I501','I509'='HF__' /*HF (Heart Failure) ICD10 (4 characters)*/

'I600'-'I616','I618'-'I621','I629'='HStr' /*HStroke ICD10 (4 characters)*/

'I630'-'I636','I638','I639'='IStr' /*IStroke ICD10 (4 characters)*/

'I210'-'I214','I219','I220','I221','I228','I229'='MI__' /*MI ICD10 (4 characters)*/

'I201','I208','I209','I248','I249','I252'-'I256','I258','I259'='OAng' /*OthAng ICD10 (4 characters)*/

'I64'-'I64X'='OStr' /*OthStroke ICD10 (4 characters)*/

'G450'-'G452','G454','G458'-'G459'='TIA_' /*TIA ICD10 (4 characters)*/

'I200'='UA__' /*UA ICD10 (4 characters)*/

other='???';

/*OthCVD ICD10 (5 characters*/

invalue $iOthCVD

'I00'-'I00XX', 'I010'-'I012X', 'I018'-'I020X', 'I029'-'I029X',

'I050'-'I052X', 'I058'-'I062X', 'I068'-'I072X', 'I078'-'I083X',

'I088'-'I092X', 'I098'-'I099X', 'I10'-'I10XX', 'I119'-'I120X',

'I129'-'I129X', 'I131'-'I131X', 'I139'-'I139X', 'I150'-'I152X',

'I158'-'I159X', 'I230'-'I236X', 'I238'-'I238X', 'I240'-'I241X',

'I250'-'I250X', 'I2510'-'I2513', 'I260'-'I260X', 'I268'-'I272X',

'I278'-'I281X', 'I288'-'I289X', 'I300'-'I301X', 'I308'-'I313X',

'I318'-'I321X', 'I328'-'I328X', 'I330'-'I330X', 'I339'-'I342X',

'I348'-'I352X', 'I358'-'I362X', 'I368'-'I372X', 'I378'-'I379X',

'I38'-'I38XX', 'I390'-'I394X', 'I398'-'I398X', 'I400'-'I401X',

'I408'-'I412X', 'I418'-'I418X', 'I423'-'I425X', 'I440'-'I447X',

'I450'-'I456X', 'I458'-'I461X', 'I469'-'I472X', 'I479'-'I479X',

'I48'-'I48XX', 'I490'-'I495X', 'I498'-'I499X', 'I510'-'I521X',

'I528'-'I528X', 'I700'-'I701X', 'I7020'-'I7024','I708'-'I709X',

'I7100'-'I7103','I711'-'I716X', 'I718'-'I724X', 'I728'-'I729X',

'I738'-'I739X', 'I790'-'I790X', 'I980'-'I981X' ='OCVD' /*OthCVD*/

other='???';

run;

**********************************************************************************************

************* 1. Produce the NZ estimated residential population for 3 Sep 2010 *************

**********************************************************************************************;

* To identify NZ residents at the time of the first earthquake in Christchurch 2010;

* This code was modified from the original posted by Nathaniel Matheson-Dunning on the IDI code sharing wiki, and the adjusted version by Sheree Gibbs;

* It has been modified for a 4 September 2010 reference date and to work without access to the IRD source tables;

%let year = 2010; ** Specify year of interest;

%let prevyear = %eval(&year. - 1);

%let startyrsql = '2010-09-04';

%let endyrsql = '2011-09-03';

*additional libraries required;

libname msd ODBC dsn=idi_clean_archive_srvprd schema=msd_clean;

libname ir ODBC dsn=idi_sandpit_srvprd schema=clean_read_ir_restrict; *IRD information available in the sandpit only;

libname acc ODBC dsn=idi_clean_archive_srvprd schema=acc_clean;

libname dia ODBC dsn=idi_clean_archive_srvprd schema=dia_clean;

libname moe ODBC dsn=idi_clean_archive_srvprd schema=moe_clean;

**************************************************************************************************************

*** 1.1 Produce a list of all individuals with activity in relevant datasets in last 12 months to 3Sep2010 ***

**************************************************************************************************************;

** Create a list of all people in the spine;

data spinepop;

set data.personal_detail (where=(snz_spine_ind=1) keep=snz_uid snz_spine_ind);

run;

**************************************************************************************

*** 1.1.1 Identify individuals with activity in education datasets in last year ***

**************************************************************************************;

** Tertiary enrolments;

proc sql;

connect to odbc(dsn="idi_clean_20160224_srvprd");

create table tertiary as

select * from connection to ODBC

(select distinct snz_uid, 1 as flag_ed

from moe_clean.enrolment

where (moe_enr_year_nbr = &year.)

order by snz_uid);

disconnect from ODBC;

quit;

** Industry training;

proc sql;

connect to odbc(dsn="idi_clean_20160224_srvprd");

create table industry_tr as

select * from connection to ODBC

(select distinct snz_uid, 1 as flag_ed

from moe_clean.tec_it_learner

where (moe_itl_year_nbr = &year.)

order by snz_uid);

disconnect from ODBC;

quit;

** School enrolments;

proc sql;

connect to odbc(dsn="idi_clean_20160224_srvprd");

create table school as

select * from connection to ODBC

(select distinct snz_uid, 1 as flag_ed, moe_esi_start_date

from moe_clean.student_enrol

where ((moe_esi_start_date<=&endyrsql.) and (moe_esi_end_date>=&startyrsql. or moe_esi_end_date is NULL))

order by snz_uid);

disconnect from ODBC;

quit;

*********************************************************************************

*** 1.1.2 Identify individuals with activity in tax datasets in last year *****

*********************************************************************************;

**This section has been modified from the Census Transformation code as we only have access to the tax summary table in the IRI restrict area;

*concordance table to match ird user id to snz id - this is done because the snz uid changes every refresh;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table inconcord as

select * from connection to odbc

(select snz_uid, snz_ird_uid

from [IDI_Clean].[security].[concordance]

where snz_ird_uid > 0

order by snz_ird_uid);

disconnect from odbc;

run;

*All individuals with taxable income in the summary table for current or previous calendar year;

proc sql;

connect to odbc(dsn="idi_sandpit_srvprd");

create table tax_sum1 as

select * from connection to ODBC

(select snz_ird_uid, ir_inc_year_nbr,

inc_cal_yr_mth_01_amt+inc_cal_yr_mth_02_amt+inc_cal_yr_mth_03_amt+inc_cal_yr_mth_04_amt+inc_cal_yr_mth_05_amt+inc_cal_yr_mth_06_amt+inc_cal_yr_mth_07_amt+inc_cal_yr_mth_08_amt as tot_first_eight,

inc_cal_yr_mth_09_amt+inc_cal_yr_mth_10_amt+inc_cal_yr_mth_11_amt+inc_cal_yr_mth_12_amt as tot_last_four

from clean_read_ir_restrict.ems_cal_yr_summary

where ir_inc_year_nbr=&year or ir_inc_year_nbr=&prevyear

order by snz_ird_uid);

disconnect from ODBC;

quit;

*merge to give latest refresh snz_uid for sandpit ird income data;

*Select only those individuals with income in the last eight months of prev year or the first eight months of this calendar year date;

data tax_sum;

merge tax_sum1 (in=a) inconcord;

by snz_ird_uid;

if a and (ir_inc_year_nbr=&year and tot_first_eight>0) or (ir_inc_year_nbr=&prevyear and tot_last_four>0);

if snz_uid=. then delete;

run;

proc sql;

create table tax as

select distinct snz_uid, 1 as flag_ir

from tax_sum;

quit;

**********************************************************************************************************

*********** 1.1.3 Identify individuals with activity in health datasets in the last year **************

**********************************************************************************************************;

** GMS claims;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table gms_activity as

select * from connection to ODBC

(select distinct snz_uid

from moh_clean.gms_claims

where moh_gms_visit_date >= &startyrsql and moh_gms_visit_date <= &endyrsql

order by snz_uid);

disconnect from odbc;

quit;

** Laboratory tests;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table lab_claims as

select * from connection to ODBC

(select distinct snz_uid

from moh_clean.lab_claims

where moh_lab_visit_date >= &startyrsql and moh_lab_visit_date <= &endyrsql

order by snz_uid);

disconnect from odbc;

quit;

** Non-admissions events;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table nnpac as

select * from connection to ODBC

(select distinct snz_uid

from moh_clean.nnpac

where moh_nnp_service_date >= &startyrsql and moh_nnp_service_date <= &endyrsql and moh_nnp_attendence_code = 'ATT'

order by snz_uid);

disconnect from odbc;

quit;

** Prescriptions dispensed;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table pharma as

select * from connection to ODBC

(select distinct snz_uid

from moh_clean.pharmaceutical

where moh_pha_dispensed_date >= &startyrsql and moh_pha_dispensed_date <= &endyrsql

order by snz_uid);

disconnect from odbc;

quit;

** Consultation with PHO-registered GP;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table pho as

select * from connection to ODBC

(select distinct snz_uid

from moh_clean.pho_enrolment

where moh_pho_last_consul_date >= &startyrsql and moh_pho_last_consul_date <= &endyrsql

order by snz_uid);

disconnect from odbc;

quit;

** Discharged from publically funded hospitals;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table hospital as

select * from connection to ODBC

(select distinct snz_uid

from moh_clean.pub_fund_hosp_discharges_event

where moh_evt_even_date >= &startyrsql and moh_evt_even_date <= &endyrsql

order by snz_uid);

disconnect from odbc;

quit;

** Combine all health activity datasets to get list of all people with health activity in last year;

data health_activity_dups;

set gms_activity lab_claims nnpac pharma pho hospital;

run;

proc sort; by snz_uid;

data health_activity;

set health_activity_dups;

by snz_uid;

if first.snz_uid;

flag_health=1;

run;

******************************************************************************

*** 1.1.4 Identify individuals with activity in ACC in the last year ******

******************************************************************************;

** ACC claims (date of file within the last year, not date of accident);

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table acc as

select * from connection to ODBC

(select distinct snz_uid, 1 as flag_acc

from acc_clean.claims

where acc_cla_lodgement_date >= &startyrsql and acc_cla_lodgement_date <= &endyrsql

order by snz_uid);

disconnect from odbc;

quit;

***************************************************************

*********** 1.2 Get births from the last 5 years ***********

***************************************************************;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table births as

select * from connection to ODBC

(select distinct snz_uid, 1 as flag_birth

from dia_clean.births

where (dia_bir_birth_year_nbr > %eval(&year. - 5) and dia_bir_birth_year_nbr <= &year.)

order by snz_uid);

disconnect from odbc;

quit;

*********************************************************************************

*** 1.3 Combine all activity files with spine to create a total population ***

*********************************************************************************;

** Combine all activity files;

data total_activity_pop;

merge tax industry_tr tertiary school health_activity acc births;

by snz_uid;

if flag_ed = . then flag_ed = 0;

if flag_ir = . then flag_ir = 0;

if flag_health = . then flag_health = 0;

if flag_acc = . then flag_acc = 0;

if flag_birth = . then flag_birth = 0;

activity_flag = 1;

drop moe_esi_start_date;

run;

proc sort data=spinepop;

by snz_uid;

run;

** Combine with spine;

data totalpop;

merge total_activity_pop (in=a) spinepop (in=b);

by snz_uid;

** Only include invididuals who had activity and are in spine ;

if a and b;

run;

********************************************************************

*** 1.4 Get age and sex from personal details file ***

*** Remove deceased individuals from the population ***

********************************************************************;

** Match with personal details file to pick up age, sex

** using hash object programming to reduce processing time and memory load;

data extra.finalpop2;

length snz_uid flag_ir flag_ed flag_health flag_acc flag_birth 8;

if _N_ = 1 then

do;

declare hash h(dataset:'totalpop');

h.defineKey('snz_uid');

h.defineData('flag_ir', 'flag_ed', 'flag_health', 'flag_acc', 'flag_birth');

h.defineDone();

end;

set data.personal_detail;

if h.find() = 0 then

output;

run;

** Calculate age at 31 August and remove deceased individuals (can we add in months to be more accurate? how much would this improve it?);

data finalpop_age2;

set extra.finalpop2;

if snz_birth_month_nbr <9 then age = &year. - snz_birth_year_nbr ;

else if snz_birth_month_nbr ge 9 then age = &prevyear. - snz_birth_year_nbr;

if age lt 0 then delete;

if snz_deceased_year_nbr < &year. and snz_deceased_year_nbr ne . then delete;

if snz_deceased_year_nbr = 2010 and snz_deceased_month_nbr <9 and snz_deceased_year_nbr ne . then delete;

rename snz_sex_code = sex

snz_ethnicity_grp1_nbr = snz_European

snz_ethnicity_grp2_nbr = snz_Maori

snz_ethnicity_grp3_nbr = snz_Pacific

snz_ethnicity_grp4_nbr = snz_Asian

snz_ethnicity_grp5_nbr = snz_MELAA

snz_ethnicity_grp6_nbr = snz_Other; *I think this is ever ethnicity from SNZ personal details file;

drop link_set_key snz_parent1_uid snz_parent2_uid snz_person_ind snz_spine_ind;

run;

** Get a list of all deaths recorded in health data;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table moh_deaths as

select * from connection to ODBC

(select distinct snz_uid, 1 as deceased_flag

from moh_clean.mortality

where (moh_mor_death_year_nbr < &year.) OR (moh_mor_death_year_nbr = &year. AND moh_mor_death_month_nbr < 9)

order by snz_uid);

disconnect from ODBC;

quit;

** Get a list of all deaths recorded in DIA data;

proc sql;

connect to odbc(dsn="idi_clean_archive_srvprd");

create table dia_deaths as

select * from connection to ODBC

(select distinct snz_uid, 1 as deceased_flag

from dia_clean.deaths

where (dia_dth_death_year_nbr < &year.) OR (dia_dth_death_year_nbr = &year. AND dia_dth_death_month_nbr < 9)

order by snz_uid);

quit;

** Merge with population file and remove deceased individuals from the population;

data finalpop_age;

merge finalpop_age2 moh_deaths dia_deaths;

by snz_uid;

if deceased_flag=1 then delete;

drop deceased_flag;

run;

*********************************************************************************

*** 1.5 Remove individuals from the population if they were living overseas ***

*********************************************************************************;

** Calculate amount of time spent overseas in last 12 months since 4th Sep 2010;

proc sql;

create table overseas_spells_1yr as

select unique snz_uid , pos_applied_date, pos_ceased_date, pos_day_span_nbr

from data.person_overseas_spell

where pos_applied_date < "4SEP&year.:00:00:00.000"dt and pos_ceased_date >= "4SEP&prevyear.:00:00:00.000"dt

order by snz_uid, pos_applied_date;

quit;

data overseas_time_1yr;

set overseas_spells_1yr;

if pos_ceased_date >= "4SEP&year.:00:00:00.000"dt and pos_applied_date < "4SEP&prevyear.:00:00:00.000"dt

then time_to_add = 365;

else if pos_ceased_date >= "4SEP&year.:00:00:00.000"dt

then time_to_add = ("4SEP&year.:00:00:00.000"dt - pos_applied_date) / 86400;