****************************************************************************
********* CHAPTER 3: APPLICATION 1: *********
********* BUILDING A DATABASE NEW APPROACH *********
****************************************************************************
* The application consists of building a gravity-type database with data from
* different sources. The goal is to apply the common basic Stata commands.
* Data source: WDI, CEPII, WTO, COMTRADE,
* Data used:tradeflows90-95.txt;
*tradeflows96-00.txt;
*tradeflows01-05.txt;
*joinwto.txt;
*GDP.csv;
*dist_cepii224.dta;
*Religion.dta;
* Data saved:gravity.dta
********* PRELIMINARY STEP *********
* Set memory and path
clear all
*set mem 800m
setmatsize 800
graph drop _all
set more off, perm
* Open database (set your folder\subfolder)
cd "D:\usuarios\Ramon\Desktop\GRAVITY\Practical guide to TPA\Chapter3\Datasets"
**** CREATE THE MAIN DATABASE WITH GRAVITY COVARIATES *****
* Step 1: Import data into STATA.
* Import trade flows data
insheet using "tradeflows.csv", clear delimiter(";") names
labelvar importer "reporter"
labelvar exporter "partner"
labelvar imports "Imports value in thousand"
save "tradeflows.dta", replace
* Import WTO accession data
insheet using "joinwto.txt", clear tab names
labelvar join "GATT/WTO accession date"
replace country = "BLX" if country == "BEL" | country == "LUX"
replace country="ZAR" if country=="COD"
duplicates drop
sort country
save "joinwto.dta", replace
* Import GDP data and correct for BLX = BEL + LUX
insheet using "GDP.csv", clear comma names
* Adjust for BLX
replacecountrycode = "BLX" if countrycode == "BEL" | countrycode == "LUX"
replacecountryname = "BENELUX" if countryname == "Belgium" | countryname == "Luxembourg"
foreach v of varlist v* {
local x : variable label `v'
rename `v' yr`x'
* Destring GDP value: NaN == .
destringyr`x', replace ignore("NaN")
* Correct for BLX
byscountrycode: egengdp`x' = total(yr`x') if countrycode == "BLX"
replaceyr`x' = gdp`x' if countrycode == "BLX"
dropgdp`x'
}
duplicates drop
save "GDP.dta", replace
* Be sure that every file (especially those temporary files) is saved in the same place
* (default directory, for example)
* Open gravity variables and correct for BLX = BEL + LUX
use "dist_cepii224.dta", clear
rename country exporter
rename partner importer
renamerepnumexporternum
renamepartnumimporternum
replace exporter= "BLX" if exporter== "BEL" | exporter== "LUX"
replace importer= "BLX" if importer== "BEL" | importer== "LUX"
collapse(mean)exporternumimporternumcontigcomlang_off colony distREPlandlockedPARTlandlocked , by(exporter importer)
drop if exporter == importer
labelvarexporternum "IFS code exporter"
labelvarimporternum "IFS code importer"
labelvarcontig "1 for contiguity"
labelvarcomlang_off "1 for common official language"
labelvar colony "1 for pairs ever in colonial relationship"
labelvardist "simple distance"
labelvarREPlandlocked "1 if exporter landlocked"
labelvarPARTlandlocked "1 if importer landlocked"
sort exporter importer
save "CEPII.dta", replace
* Step 2: Create all possible country-pairs-year combinations
use "tradeflows.dta", clear
fillin importer exporter year
replace imports = 0 if imports == .
drop if importer == exporter
drop _fillin
save "gravity_temp1.dta", replace
* Step 3: Reshape and Merge country-specific data with bilateral trade flows
* Reshape the gdp data
use "GDP.dta", clear
keepcountrycodeyr*
reshape long yr, i(countrycode) j(year)
renameyrgdp
labelvargdp "GDP in current USD"
save "GDP_new.dta", replace
* Create the country-specific gdp data
use "GDP_new.dta", clear
rename country exporter
renamegdpgdp_exporter
save "GDP_exporter.dta", replace
use "GDP_new.dta", clear
rename country importer
renamegdpgdp_importer
save "GDP_importer.dta", replace
* Merge the country-specific data with bilateral trade
use "gravity_temp1.dta", clear
sort exporter year
merge exporter year using "GDP_exporter.dta"
keep if _merge == 3
drop _merge
sort importer year
merge importer year using "GDP_importer.dta"
keep if _merge == 3
drop _merge
sort exporter importer year
save "gravity_temp2.dta", replace
* Do the same for the WTO accession data, but when mergin do not
* drop the observations where there was no match(keep if _merge == 3),
* becauseit means that the country is not a member.
use "joinWTO.dta", clear
rename country exporter
rename join join_exporter
save "joinWTO_exporter.dta", replace
use "joinWTO.dta", clear
rename country importer
rename join join_importer
save "joinWTO_importer.dta", replace
use "gravity_temp2.dta", clear
sort exporter year
merge exporter using "joinWTO_exporter.dta"
drop _merge
sort importer year
merge importer using "joinWTO_importer.dta"
drop _merge
sort exporter importer year
save "gravity_temp3.dta", replace
* Step 4: Merge with pair-specific data (CEPII Gravity data)
use "gravity_temp3.dta", clear
sort exporter importer year
merge exporter importer using "CEPII.dta"
keep if _merge == 3
drop _merge
sort exporter importer year
merge exporter importer using "Religion.dta"
keep if _merge == 1 | _merge == 3
drop _merge
replace religion = 0 if religion == .
sort exporter importer year
save "gravity_temp4.dta", replace
* Step 5: Generate new country-pair variables
use "gravity_temp4.dta", clear
* note the substitution below of missing data with the random number 9999 is functional to building the WTO-membership variables
replacejoin_importer = 9999 if join_importer == .
replacejoin_exporter = 9999 if join_exporter == .
forvar in any oneinbothinnonein: gen var = 0
replaceonein = 1 if (join_exporter <= year & join_importer > year) | (join_importer <= year & join_exporter > year)
labelvaronein "one of the country pair is member of the WTO"
replacebothin = 1 if (join_exporter <= year & join_importer <= year)
labelvarbothin "both countries is member of the WTO"
replacenonein = 1 if (join_exporter > year & join_importer > year)
labelvaronein "none of the country pair is member of the WTO"
replacejoin_importer = . ifjoin_importer == 9999
replacejoin_exporter = . ifjoin_exporter == 9999
* OECD var:
genoecd_ex=0
replace oecd_ex= 1 if inlist(exporter, "AUT", "AUS", "BEN", "CAN","CHL","CZE","DNK","EST","FIN")
replace oecd_ex= 1 if inlist(exporter, "FRA","DEU","GRC","HUN","ISL","IRL","ISR","ITA","JPN")
replace oecd_ex= 1 if inlist(exporter, ”KOR”,”LUX”,”MEX”,”NLD”,”NZL”,”NOR”,”POL”,”PRT”,”SVK”)
replace oecd_ex= 1 if inlist(exporter, ”SVN”,”ESP”,”SWE”,”CHE”,”TUR”,”GBR”,”USA”)
labelvaroecd_ex"origin in OECD country"
genoecd_im=0
replace oecd_im= 1 if inlist(importer, "AUT", "AUS", "BEN", "CAN","CHL","CZE","DNK","EST","FIN")
replace oecd_im= 1 if inlist(importer, "FRA","DEU","GRC","HUN","ISL","IRL","ISR","ITA","JPN")
replace oecd_im= 1 if inlist(importer, ”KOR”,”LUX”,”MEX”,”NLD”,”NZL”,”NOR”,”POL”,”PRT”,”SVK”)
replace oecd_im= 1 if inlist(importer, ”SVN”,”ESP”,”SWE”,”CHE”,”TUR”,”GBR”,”USA”)
labelvaroecd_im"destination to OECD country"
savegravity.dta, replace
save "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity.dta", replace
foreach file in gravity_temp1.dta gravity_temp2.dta gravity_temp3.dta gravity_temp4.dta{
erase `file'
}
* Step 6 : Generate dummies
use "gravity.dta", clear
* Compute the country imports and exports dummies
quietly tab exporter, gen(exporter_)
quietly tab importer, gen(importer_)
* Compute time dummies
quietly tab year, gen(year_)
* Compute country-time dummies
* If you possess STATA/IC, you will not be able to increase the
* number of variables to create all the dummies. To adresse this
* issue, there are different possibilities.
/*
* 1) Reduce the number of years considered
* 2) Compute country-period dummies
* 3) Make the panel balanced
* keep importer exporter year year_* imports gdp_exportergdp_importerjoin_exporter
* join_importercontigcomlang_off colony distoneinbothinnonein
* Default option having full memory
*egenexporteryear = group(exporter year)
*quietly tab exporteryear, gen(exportertime_)
*egenimporteryear = group(importer year)
*quietly tab importeryear, gen(importertime_)
* 1) Reduce the number of years considered to compute
* country-time dummies (1995-2005) or (2000-2005)
* this is the option we will choose for the exercise
keep if year > 1995
egenexporteryear = group(exporter year)
quietly tab exporteryear, gen(exportertime_)
egenimporteryear = group(importer year)
quietly tab importeryear, gen(importertime_)
dropimporteryearexporteryear
* keep importer exporter year year_* imports gdp_exportergdp_importerjoin_exporter
* join_importercontigcomlang_off colony distoneinbothinnonein
* 2) Compute country-period dummies. You can change the number
* of years for a period, by changing the value of the local
* variable step (step = 2).
*gen time = 0
*egenminyr = min(year)
*egenmaxyr = max(year)
*levelsofminyr, local(miny)
*levelsofmaxyr, local(maxy)
*local step = 3
*forvaluesi = `miny'(`step')`maxy' {
*forvalues j = 0(1)`step' {
*replace time = (`i' - `miny')/`step' if year == (`i' + `j')
*}
*}
*egenexportertime = group(exporter time)
*quietly tab exportertime, gen(exportertime_)
*
*egenimportertime = group(importer time)
*quietly tab importertime, gen(importertime_)
*
*drop minyrmaxyr
* 3) Make the panel balanced to compute country-time
* dummies. Note that you can download and use the function
* xtbalance to make the panel balanced. More generally,
* there is a trade-off between the number of observations
* and the number of period covered.
* egen miss = rowmiss(imports gdp_importergdp_exporter)
* keep if miss == 0
* bys importer exporter: egennrobs = count(imports)
* levelsofminyr, local(miny)
* levelsofmaxyr, local(maxy)
* drop if nrobs < `maxy' - `miny' + 1
* egenexporteryear = group(exporter year)
* quietly tab exporteryear, gen(exportertime_)
* egenimporteryear = group(importer year)
* quietly tab importeryear, gen(importertime_)
*/
* Compute country pair dummies
egenpairid = group(importer exporter)
quietly tab pairid, gen(pair_)
* for using xtbalancefinditxtbalance and download the function
* declare the panel structure
* egenpairid = group(importer exporter)
* tssetpairid year
* use xtbalance with the option “range”
* xtbalance, range (2000 2005)
* Step 7: Data tansformation
* Compute the log of the variables imports, GDPs and distance
genlimports = log(imports)
labelvarlimport "Log of imports value"
genlgdp_exporter = log(gdp_exporter)
labelvarlgdp_exporter "log of exporter's GDP"
genlgdp_importer = log(gdp_importer)
labelvarlgdp_importer "log of importer's GDP"
genldist = log(dist)
labelvarldist "log of distance"
* Compute the 5 years average of the variables
* gen period = 1
* replace period = 2 if year >= 1995 & year < 2000
* replace period = 3 if year >= 2000
* collapse(mean) limportlgdp_exporterlgdp_importerldist noneinbothinoneincontigcomlang_off colony distREPlandlockedPARTlandlocked year_* exporter_* importer_* , by(period)
save "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity_1996_2005.dta", replace
keepif inrange(year, 2000, 2005) & oecd_ex==1 oecd_im==1
dropexporter_* importer_*exportertime* importertime*year_*
save "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity_OECD_2000_2005.dta", replace
*** ECONOMETRIC ESTIMATIONS ***
* REG1: Basic cross section estimation for 2005 of equation
clear all
cd "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS"
use"D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity_OECD_2000_2005.dta",clear
quietly tab exporter, gen(exporter_)
quietly tab importer, gen(importer_)
setmatsize 10000
egenexporteryear = group(exporter year)
quietly tab exporteryear, gen(exportertime_)
egenimporteryear = group(importer year)
quietly tab importeryear, gen(importertime_)
* inspect limports if year== 2005
reglimportsldistlgdp* if year==2005
* comparing coefficients between years 2000 and 2005
reglimportsldistlgdp* if year==2000
est store year2000
reglimportsldistlgdp* if year==2005
est store year2005
suest year2000 year2005
test [year2000_mean]ldist = [year2005_mean]ldist
* REG2: Cross section estimation for 2005 of equation adding the rest of explanatory (not dummies)
reglimports ldistlgdp* if year==2005, robust
eststo est1
reglimports contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp* if year==2005, robust
eststo est2
esttab, r2 ar2 se scalar(rmse)
* REG3:
* *3.1 For cross-section 2005 with country dummies
reglimportsi.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*if year==2005, robust
** 3.2 For the period 2000-2005 with country dummies
reglimportsi.yeari.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust
**3.3 Controlling for time-variant MTR (2000-2005)
reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_*contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust
**3.4 Controlling for “pairid” fixed effects (2000-2005)
reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_* i.pairidcontigcomlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust
* Comparative table of CROSS-SECTION OR POOLED ESPECIFICATIONS WITH AND WITHOUT MTR CONTROL
eststo clear
eststo: quietly reglimportsi.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp* if year==2005, robust
eststo: quietly reglimportsi.yeari.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust
eststo: quietly reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_* contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust
eststo: quietly reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_* i.pairidcontigcomlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust
esttab, r2 ar2 se keep (*year contigcomlang_off colony religion ldistlgdp*) noomittedmtitles (NO_Dum ij_dum_2005 ij_dum_00_05 ijxt_dum_00_05) nogaps
* REG4=* Step 8: Identify the Panel and run regressions
* Specify the panel structure
xtsetpairid year
eststo: xtreglimportsi.yearimportertime_* exportertime_* contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, fe robust
esttab, r2 ar2 se keep (*year contigcomlang_off colony religion ldistlgdp*) noomittedmtitles (NO_Dum ij_dum_2005 ij_dum_00_05 ijxt_dum_00_05 panel) nogaps