****************************************************************************

********* CHAPTER 3: APPLICATION 1: *********

********* BUILDING A DATABASE NEW APPROACH *********

****************************************************************************

* The application consists of building a gravity-type database with data from

* different sources. The goal is to apply the common basic Stata commands.

* Data source: WDI, CEPII, WTO, COMTRADE,

* Data used:tradeflows90-95.txt;

*tradeflows96-00.txt;

*tradeflows01-05.txt;

*joinwto.txt;

*GDP.csv;

*dist_cepii224.dta;

*Religion.dta;

* Data saved:gravity.dta

********* PRELIMINARY STEP *********

* Set memory and path

clear all

*set mem 800m

setmatsize 800

graph drop _all

set more off, perm

* Open database (set your folder\subfolder)

cd "D:\usuarios\Ramon\Desktop\GRAVITY\Practical guide to TPA\Chapter3\Datasets"

**** CREATE THE MAIN DATABASE WITH GRAVITY COVARIATES *****

* Step 1: Import data into STATA.

* Import trade flows data

insheet using "tradeflows.csv", clear delimiter(";") names

labelvar importer "reporter"

labelvar exporter "partner"

labelvar imports "Imports value in thousand"

save "tradeflows.dta", replace

* Import WTO accession data

insheet using "joinwto.txt", clear tab names

labelvar join "GATT/WTO accession date"

replace country = "BLX" if country == "BEL" | country == "LUX"

replace country="ZAR" if country=="COD"

duplicates drop

sort country

save "joinwto.dta", replace

* Import GDP data and correct for BLX = BEL + LUX

insheet using "GDP.csv", clear comma names

* Adjust for BLX

replacecountrycode = "BLX" if countrycode == "BEL" | countrycode == "LUX"

replacecountryname = "BENELUX" if countryname == "Belgium" | countryname == "Luxembourg"

foreach v of varlist v* {

local x : variable label `v'

rename `v' yr`x'

* Destring GDP value: NaN == .

destringyr`x', replace ignore("NaN")

* Correct for BLX

byscountrycode: egengdp`x' = total(yr`x') if countrycode == "BLX"

replaceyr`x' = gdp`x' if countrycode == "BLX"

dropgdp`x'

}

duplicates drop

save "GDP.dta", replace

* Be sure that every file (especially those temporary files) is saved in the same place

* (default directory, for example)

* Open gravity variables and correct for BLX = BEL + LUX

use "dist_cepii224.dta", clear

rename country exporter

rename partner importer

renamerepnumexporternum

renamepartnumimporternum

replace exporter= "BLX" if exporter== "BEL" | exporter== "LUX"

replace importer= "BLX" if importer== "BEL" | importer== "LUX"

collapse(mean)exporternumimporternumcontigcomlang_off colony distREPlandlockedPARTlandlocked , by(exporter importer)

drop if exporter == importer

labelvarexporternum "IFS code exporter"

labelvarimporternum "IFS code importer"

labelvarcontig "1 for contiguity"

labelvarcomlang_off "1 for common official language"

labelvar colony "1 for pairs ever in colonial relationship"

labelvardist "simple distance"

labelvarREPlandlocked "1 if exporter landlocked"

labelvarPARTlandlocked "1 if importer landlocked"

sort exporter importer

save "CEPII.dta", replace

* Step 2: Create all possible country-pairs-year combinations

use "tradeflows.dta", clear

fillin importer exporter year

replace imports = 0 if imports == .

drop if importer == exporter

drop _fillin

save "gravity_temp1.dta", replace

* Step 3: Reshape and Merge country-specific data with bilateral trade flows

* Reshape the gdp data

use "GDP.dta", clear

keepcountrycodeyr*

reshape long yr, i(countrycode) j(year)

renameyrgdp

labelvargdp "GDP in current USD"

save "GDP_new.dta", replace

* Create the country-specific gdp data

use "GDP_new.dta", clear

rename country exporter

renamegdpgdp_exporter

save "GDP_exporter.dta", replace

use "GDP_new.dta", clear

rename country importer

renamegdpgdp_importer

save "GDP_importer.dta", replace

* Merge the country-specific data with bilateral trade

use "gravity_temp1.dta", clear

sort exporter year

merge exporter year using "GDP_exporter.dta"

keep if _merge == 3

drop _merge

sort importer year

merge importer year using "GDP_importer.dta"

keep if _merge == 3

drop _merge

sort exporter importer year

save "gravity_temp2.dta", replace

* Do the same for the WTO accession data, but when mergin do not

* drop the observations where there was no match(keep if _merge == 3),

* becauseit means that the country is not a member.

use "joinWTO.dta", clear

rename country exporter

rename join join_exporter

save "joinWTO_exporter.dta", replace

use "joinWTO.dta", clear

rename country importer

rename join join_importer

save "joinWTO_importer.dta", replace

use "gravity_temp2.dta", clear

sort exporter year

merge exporter using "joinWTO_exporter.dta"

drop _merge

sort importer year

merge importer using "joinWTO_importer.dta"

drop _merge

sort exporter importer year

save "gravity_temp3.dta", replace

* Step 4: Merge with pair-specific data (CEPII Gravity data)

use "gravity_temp3.dta", clear

sort exporter importer year

merge exporter importer using "CEPII.dta"

keep if _merge == 3

drop _merge

sort exporter importer year

merge exporter importer using "Religion.dta"

keep if _merge == 1 | _merge == 3

drop _merge

replace religion = 0 if religion == .

sort exporter importer year

save "gravity_temp4.dta", replace

* Step 5: Generate new country-pair variables

use "gravity_temp4.dta", clear

* note the substitution below of missing data with the random number 9999 is functional to building the WTO-membership variables

replacejoin_importer = 9999 if join_importer == .

replacejoin_exporter = 9999 if join_exporter == .

forvar in any oneinbothinnonein: gen var = 0

replaceonein = 1 if (join_exporter <= year & join_importer > year) | (join_importer <= year & join_exporter > year)

labelvaronein "one of the country pair is member of the WTO"

replacebothin = 1 if (join_exporter <= year & join_importer <= year)

labelvarbothin "both countries is member of the WTO"

replacenonein = 1 if (join_exporter > year & join_importer > year)

labelvaronein "none of the country pair is member of the WTO"

replacejoin_importer = . ifjoin_importer == 9999

replacejoin_exporter = . ifjoin_exporter == 9999

* OECD var:

genoecd_ex=0

replace oecd_ex= 1 if inlist(exporter, "AUT", "AUS", "BEN", "CAN","CHL","CZE","DNK","EST","FIN")

replace oecd_ex= 1 if inlist(exporter, "FRA","DEU","GRC","HUN","ISL","IRL","ISR","ITA","JPN")

replace oecd_ex= 1 if inlist(exporter, ”KOR”,”LUX”,”MEX”,”NLD”,”NZL”,”NOR”,”POL”,”PRT”,”SVK”)

replace oecd_ex= 1 if inlist(exporter, ”SVN”,”ESP”,”SWE”,”CHE”,”TUR”,”GBR”,”USA”)

labelvaroecd_ex"origin in OECD country"

genoecd_im=0

replace oecd_im= 1 if inlist(importer, "AUT", "AUS", "BEN", "CAN","CHL","CZE","DNK","EST","FIN")

replace oecd_im= 1 if inlist(importer, "FRA","DEU","GRC","HUN","ISL","IRL","ISR","ITA","JPN")

replace oecd_im= 1 if inlist(importer, ”KOR”,”LUX”,”MEX”,”NLD”,”NZL”,”NOR”,”POL”,”PRT”,”SVK”)

replace oecd_im= 1 if inlist(importer, ”SVN”,”ESP”,”SWE”,”CHE”,”TUR”,”GBR”,”USA”)

labelvaroecd_im"destination to OECD country"

savegravity.dta, replace

save "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity.dta", replace

foreach file in gravity_temp1.dta gravity_temp2.dta gravity_temp3.dta gravity_temp4.dta{

erase `file'

}

* Step 6 : Generate dummies

use "gravity.dta", clear

* Compute the country imports and exports dummies

quietly tab exporter, gen(exporter_)

quietly tab importer, gen(importer_)

* Compute time dummies

quietly tab year, gen(year_)

* Compute country-time dummies

* If you possess STATA/IC, you will not be able to increase the

* number of variables to create all the dummies. To adresse this

* issue, there are different possibilities.

/*

* 1) Reduce the number of years considered

* 2) Compute country-period dummies

* 3) Make the panel balanced

* keep importer exporter year year_* imports gdp_exportergdp_importerjoin_exporter

* join_importercontigcomlang_off colony distoneinbothinnonein

* Default option having full memory

*egenexporteryear = group(exporter year)

*quietly tab exporteryear, gen(exportertime_)

*egenimporteryear = group(importer year)

*quietly tab importeryear, gen(importertime_)

* 1) Reduce the number of years considered to compute

* country-time dummies (1995-2005) or (2000-2005)

* this is the option we will choose for the exercise

keep if year > 1995

egenexporteryear = group(exporter year)

quietly tab exporteryear, gen(exportertime_)

egenimporteryear = group(importer year)

quietly tab importeryear, gen(importertime_)

dropimporteryearexporteryear

* keep importer exporter year year_* imports gdp_exportergdp_importerjoin_exporter

* join_importercontigcomlang_off colony distoneinbothinnonein

* 2) Compute country-period dummies. You can change the number

* of years for a period, by changing the value of the local

* variable step (step = 2).

*gen time = 0

*egenminyr = min(year)

*egenmaxyr = max(year)

*levelsofminyr, local(miny)

*levelsofmaxyr, local(maxy)

*local step = 3

*forvaluesi = `miny'(`step')`maxy' {

*forvalues j = 0(1)`step' {

*replace time = (`i' - `miny')/`step' if year == (`i' + `j')

*}

*}

*egenexportertime = group(exporter time)

*quietly tab exportertime, gen(exportertime_)

*

*egenimportertime = group(importer time)

*quietly tab importertime, gen(importertime_)

*

*drop minyrmaxyr

* 3) Make the panel balanced to compute country-time

* dummies. Note that you can download and use the function

* xtbalance to make the panel balanced. More generally,

* there is a trade-off between the number of observations

* and the number of period covered.

* egen miss = rowmiss(imports gdp_importergdp_exporter)

* keep if miss == 0

* bys importer exporter: egennrobs = count(imports)

* levelsofminyr, local(miny)

* levelsofmaxyr, local(maxy)

* drop if nrobs < `maxy' - `miny' + 1

* egenexporteryear = group(exporter year)

* quietly tab exporteryear, gen(exportertime_)

* egenimporteryear = group(importer year)

* quietly tab importeryear, gen(importertime_)

*/

* Compute country pair dummies

egenpairid = group(importer exporter)

quietly tab pairid, gen(pair_)

* for using xtbalancefinditxtbalance and download the function

* declare the panel structure

* egenpairid = group(importer exporter)

* tssetpairid year

* use xtbalance with the option “range”

* xtbalance, range (2000 2005)

* Step 7: Data tansformation

* Compute the log of the variables imports, GDPs and distance

genlimports = log(imports)

labelvarlimport "Log of imports value"

genlgdp_exporter = log(gdp_exporter)

labelvarlgdp_exporter "log of exporter's GDP"

genlgdp_importer = log(gdp_importer)

labelvarlgdp_importer "log of importer's GDP"

genldist = log(dist)

labelvarldist "log of distance"

* Compute the 5 years average of the variables

* gen period = 1

* replace period = 2 if year >= 1995 & year < 2000

* replace period = 3 if year >= 2000

* collapse(mean) limportlgdp_exporterlgdp_importerldist noneinbothinoneincontigcomlang_off colony distREPlandlockedPARTlandlocked year_* exporter_* importer_* , by(period)

save "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity_1996_2005.dta", replace

keepif inrange(year, 2000, 2005) & oecd_ex==1 oecd_im==1

dropexporter_* importer_*exportertime* importertime*year_*

save "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity_OECD_2000_2005.dta", replace

*** ECONOMETRIC ESTIMATIONS ***

* REG1: Basic cross section estimation for 2005 of equation

clear all

cd "D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS"

use"D:\usuarios\Ramon\Desktop\GRAVITY\PROPIOS\gravity_OECD_2000_2005.dta",clear

quietly tab exporter, gen(exporter_)

quietly tab importer, gen(importer_)

setmatsize 10000

egenexporteryear = group(exporter year)

quietly tab exporteryear, gen(exportertime_)

egenimporteryear = group(importer year)

quietly tab importeryear, gen(importertime_)

* inspect limports if year== 2005

reglimportsldistlgdp* if year==2005

* comparing coefficients between years 2000 and 2005

reglimportsldistlgdp* if year==2000

est store year2000

reglimportsldistlgdp* if year==2005

est store year2005

suest year2000 year2005

test [year2000_mean]ldist = [year2005_mean]ldist

* REG2: Cross section estimation for 2005 of equation adding the rest of explanatory (not dummies)

reglimports ldistlgdp* if year==2005, robust

eststo est1

reglimports contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp* if year==2005, robust

eststo est2

esttab, r2 ar2 se scalar(rmse)

* REG3:

* *3.1 For cross-section 2005 with country dummies

reglimportsi.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*if year==2005, robust

** 3.2 For the period 2000-2005 with country dummies

reglimportsi.yeari.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust

**3.3 Controlling for time-variant MTR (2000-2005)

reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_*contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust

**3.4 Controlling for “pairid” fixed effects (2000-2005)

reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_* i.pairidcontigcomlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust

* Comparative table of CROSS-SECTION OR POOLED ESPECIFICATIONS WITH AND WITHOUT MTR CONTROL

eststo clear

eststo: quietly reglimportsi.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp* if year==2005, robust

eststo: quietly reglimportsi.yeari.exporternumi.importernumcontig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust

eststo: quietly reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_* contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust

eststo: quietly reglimportsi.yeari.exporternumi.importernumexportertime_* importertime_* i.pairidcontigcomlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, robust

esttab, r2 ar2 se keep (*year contigcomlang_off colony religion ldistlgdp*) noomittedmtitles (NO_Dum ij_dum_2005 ij_dum_00_05 ijxt_dum_00_05) nogaps

* REG4=* Step 8: Identify the Panel and run regressions

* Specify the panel structure

xtsetpairid year

eststo: xtreglimportsi.yearimportertime_* exportertime_* contig comlang_off onein colony REPlandlockedPARTlandlocked religion ldistlgdp*, fe robust

esttab, r2 ar2 se keep (*year contigcomlang_off colony religion ldistlgdp*) noomittedmtitles (NO_Dum ij_dum_2005 ij_dum_00_05 ijxt_dum_00_05 panel) nogaps