import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
from pathlib import Path

sys.path.append("scripts/")
from scripts import generate_eda as eda

%matplotlib inline

birthrate = pd.read_csv("cleaned-data/1_birth rate.csv")
birthrate.head()

birthrate.describe()

birthrate_long = pd.melt(birthrate, id_vars="Region", var_name="Year",
                      value_name="birth_rate")
birthrate_long.sort_values(by="Year", inplace=True)

birthrate_long.head()

configs = {
    "FONT_SIZE": 10, 
    "FIGSIZE": (20, 20)
}
plot_title = "Birth rate by regions"
exceptions = {
    "Jilin": "red",
    "Shandong": "blue"
}

fig = eda.line_plots(birthrate, "birth_rate", plot_title, exceptions, configs, show=False)
fig.axes[0].set_ylim(top=24)

(4.7335, 24.0)

birthrate_transpose = birthrate.T
birthrate_transpose.columns = birthrate_transpose.iloc[0]
birthrate_transpose = birthrate_transpose[1:].astype(float)
birthrate_corr = birthrate_transpose.corr(numeric_only=True) # correlation matrix
birthrate_corr.head()

plt.figure(figsize=(10, 10))
birthrate_corr_colors = (birthrate_corr - birthrate_corr.min().min() )
birthrate_corr_colors = birthrate_corr_colors * (1/birthrate_corr_colors.max().max())
ax = sns.heatmap(birthrate_corr_colors, cmap='viridis',
            fmt="d", linewidths=0.5, center=0)
plt.title('Correlation matrix of birth rate of regions')
plt.xlabel('Year')
plt.ylabel('Region')
ax.tick_params(axis='x', labelrotation=90)
plt.show()

NGDP = pd.read_csv("cleaned-data/3_NGDP.csv")
NGDP.head()

population = pd.read_csv("cleaned-data/2_population.csv")
population.head()

merged = pd.merge(NGDP, population, on='Region')

merged['2019'] = merged['2019_x'] / merged['2019_y']
merged['2018'] = merged['2018_x'] / merged['2018_y']
merged['2017'] = merged['2019_x'] / merged['2019_y']
merged['2016'] = merged['2016_x'] / merged['2016_y']
merged['2015'] = merged['2015_x'] / merged['2015_y']
merged['2014'] = merged['2014_x'] / merged['2014_y']
merged['2013'] = merged['2013_x'] / merged['2013_y']
merged['2012'] = merged['2012_x'] / merged['2012_y']
merged['2011'] = merged['2011_x'] / merged['2011_y']
merged['2010'] = merged['2010_x'] / merged['2010_y']
merged['2009'] = merged['2009_x'] / merged['2009_y']
merged['2008'] = merged['2008_x'] / merged['2008_y']
merged['2007'] = merged['2007_x'] / merged['2007_y']
merged['2006'] = merged['2006_x'] / merged['2006_y']

ngdp_per_capita = merged.iloc[:, [0,29,30,31,32,33,34,35,36,37,38,39,40,41,42]]
ngdp_per_capita.head()

ngdp_per_capita.describe()

ngdp_per_capita_long = pd.melt(ngdp_per_capita, id_vars="Region", var_name="Year",
                      value_name="ngdp_per_capita")
ngdp_per_capita_long.sort_values(by="Year", inplace=True)

ngdp_per_capita_long.head()

from scripts import generate_eda as eda

configs = {
    "FONT_SIZE": 15, 
    "FIGSIZE": (20, 20)
}

plot_title = "NGDP per capita by region"

exceptions = {
    "Beijing": "red",
    "Shanghai": "blue",
    "Zhejiang": "green",
    "Jiangsu": "orange",
    "Guizhou": "purple",
    "Gansu": "black"
}

fig = eda.line_plots(ngdp_per_capita, plot_title,"ngdp_per_capita", exceptions, configs)

ngdp_per_capita_transpose = ngdp_per_capita.T
ngdp_per_capita_transpose.columns = ngdp_per_capita_transpose.iloc[0]
ngdp_per_capita_transpose = ngdp_per_capita_transpose[1:].astype(float)
ngdp_per_capita_corr = ngdp_per_capita_transpose.corr(numeric_only=True) # correlation matrix
ngdp_per_capita_corr.head()

plt.figure(figsize=(10, 10))
ngdp_per_capita_corr_colors = (ngdp_per_capita_corr - ngdp_per_capita_corr.min().min() )
ngdp_per_capita_corr_colors = ngdp_per_capita_corr_colors * (1/ngdp_per_capita_corr_colors.max().max())
ax = sns.heatmap(ngdp_per_capita_corr_colors, cmap='viridis',
            fmt="d", linewidths=0.5, center=0)
plt.title('Correlation matrix of Nominal GDP per capita of regions')
plt.xlabel('Year')
plt.ylabel('Region')
ax.tick_params(axis='x', labelrotation=90)
plt.show()

wage = pd.read_csv("cleaned-data/4_average wage.csv")
wage.head()

wage.describe()

wage_long = pd.melt(wage, id_vars="Region", var_name="Year",
                      value_name="wage")
wage_long.sort_values(by="Year", inplace=True)

wage_long.head()

from scripts import generate_eda as eda

configs = {
    "FONT_SIZE": 15, 
    "FIGSIZE": (20, 20)
}
plot_title = "Wages by region"
exceptions = {
    "Beijing": "red",
    "Shanghai": "blue",
    "Tianjin": "green",
    "Jiangsu": "orange",
}
fig = eda.line_plots(wage, "wage", plot_title, exceptions, configs)

wage_transpose = wage.T
wage_transpose.columns = wage_transpose.iloc[0]
wage_transpose = wage_transpose[1:].astype(float)
wage_corr = wage_transpose.corr(numeric_only=True) # correlation matrix
wage_corr.head()

plt.figure(figsize=(10, 10))
wage_corr_colors = (wage_corr - wage_corr.min().min() )
wage_corr_colors = wage_corr_colors * (1/wage_corr_colors.max().max())
ax = sns.heatmap(wage_corr_colors, cmap='viridis',
            fmt="d", linewidths=0.5, center=0)
plt.title('Correlation matrix of average wage for each province')
plt.xlabel('Year')
plt.ylabel('Region')
ax.tick_params(axis='x', labelrotation=90)
plt.show()

income = pd.read_csv("cleaned-data/5_disposable income per capita .csv")
income.head()

income.describe()

income_long = pd.melt(income, id_vars="Region", var_name="Year",
                      value_name="disposable_income")
income_long.sort_values(by="Year", inplace=True)

income_long.head()

from scripts import generate_eda as eda

configs = {
    "FONT_SIZE": 15, 
    "FIGSIZE": (20, 20)
}
plot_title = "Disposable income by region"
exceptions = {
    "Beijing": "red",
    "Shanghai": "blue",
    "Zhejiang": "green",
    "Guizhou": "purple",
    "Gansu": "black",
    "Tibet Autonomous Region": "brown"
}
fig = eda.line_plots(income, "disposable_income", plot_title, exceptions,  configs)

income_transpose = income.T
income_transpose.columns = income_transpose.iloc[0]
income_transpose = income_transpose[1:].astype(float)
income_corr = income_transpose.corr(numeric_only=True) # correlation matrix
income_corr.head()

plt.figure(figsize=(10, 10))
income_corr_colors = (income_corr - income_corr.min().min() )
income_corr_colors = income_corr_colors * (1/income_corr_colors.max().max())
ax = sns.heatmap(income_corr_colors, cmap='viridis',
            fmt="d", linewidths=0.5, center=0)
plt.title('Correlation matrix of disposable income of regions')
plt.xlabel('Year')
plt.ylabel('Region')
ax.tick_params(axis='x', labelrotation=90)
plt.show()

unemprate = pd.read_csv("cleaned-data/6_unemployment rate.csv")
unemprate.head()

unemprate.describe()

unemprate_long = pd.melt(unemprate, id_vars="Region", var_name="Year",
                      value_name="unemp_rate")
unemprate_long.sort_values(by="Year", inplace=True)

unemprate_long.head()

from scripts import generate_eda as eda

configs = {
    "FONT_SIZE": 1, 
    "FIGSIZE": (20, 20)
}
plot_title = "Unemployment Rate by region"
exceptions = {
    "Beijing": "red",
    "Hainan": "blue",
}
fig = eda.line_plots(unemprate, "unemp_rate", plot_title, exceptions, configs, show=False)
ax = fig.axes[0]
ax.set_ylim(top=7)
ax.legend(loc="upper right")

<matplotlib.legend.Legend at 0x170d444d0>

unemprate_transpose = unemprate.T
unemprate_transpose.columns = unemprate_transpose.iloc[0]
unemprate_transpose = unemprate_transpose[1:].astype(float)
unemprate_corr = unemprate_transpose.corr(numeric_only=True) # correlation matrix
unemprate_corr.head()

plt.figure(figsize=(10, 10))
unemprate_corr_colors = (unemprate_corr - unemprate_corr.min().min() )
unemprate_corr_colors = unemprate_corr_colors * (1/unemprate_corr_colors.max().max())
ax = sns.heatmap(unemprate_corr_colors, cmap='viridis',
            fmt="d", linewidths=0.5, center=0)
plt.title('Correlation matrix of unemployment rate of regions')
plt.xlabel('Year')
plt.ylabel('Region')
ax.tick_params(axis='x', labelrotation=90)
plt.show()

cpi = pd.read_csv("cleaned-data/9_cpi.csv")
cpi.head()

inflation_rate = (cpi.set_index("Region").T.sort_index() \
                    .pct_change()*100) \
                    .iloc[1:,:] # select from 2006 onwards (2005 are all NaN)
inflation_rate.head()

pop_long = population.melt(id_vars="Region", var_name="Year", value_name="Population")
ngdp_long = NGDP.melt(id_vars="Region", var_name="Year", value_name="NGDP")
infl_rate_long = inflation_rate.reset_index(names="Year") \
        .melt(id_vars = "Year", 
              value_vars = inflation_rate.columns.tolist(), 
              var_name = "Region", 
              value_name = "inflation_rate")

national_ngdp_per_cap = ngdp_long \
        .merge(right=pop_long, on=["Region", "Year"], how="left") \
        .groupby("Year").agg({
            "NGDP": "sum",
            "Population": "sum"
        })
national_ngdp_per_cap["NGDP_per_cap"] = national_ngdp_per_cap["NGDP"] / national_ngdp_per_cap["Population"]

total_pop = pop_long.groupby("Year", as_index=False).agg({"Population": "sum"})
weights_df = pop_long.merge(right=total_pop, on=["Year"], how="left")
weights_df["weights"] = weights_df["Population_x"] / weights_df["Population_y"]

dfs_long = [birthrate_long, wage_long, income_long, \
            unemprate_long, ngdp_long, infl_rate_long]

def all_same(items):
    if isinstance(items[0], pd.DataFrame):
        return all((x.equals(items[0]) for x in items))
    return all(x == items[0] for x in items)


# Test to see if the dataframes has the same shape aka # rows and # cols
# print(all_same(list(map(lambda df: df.shape, dfs_long))))

# Test to see if the column Region and Year are the same for all dataframes
# print(all_same(list(map(lambda df: df[["Region", "Year"]], dfs_long))))

from functools import reduce

# join all dataframe together on column "Region" and "Year"
df_merged = reduce(lambda  left,right: 
    pd.merge(left,right,on=['Region', 'Year'], how='left'), dfs_long)

weights_df

df_aggs = df_merged.merge(right=weights_df, on=["Region", "Year"], how="left")
df_aggs = df_aggs.assign(
    agg_wage = lambda x: x["wage"] * x["weights"],
    agg_birth_rate = lambda x: x["birth_rate"] * x["weights"],
    agg_disposable_income = lambda x: x["disposable_income"] * x["weights"],
    agg_unemp_rate = lambda x: x["unemp_rate"] * x["weights"],
    agg_inflation_rate = lambda x: x["inflation_rate"] * x["weights"]

)

df_aggs = df_aggs \
    .filter(like="agg_") \
    .assign(Year=df_aggs.Year) \
    .groupby("Year", as_index=False) \
    .sum()
df_aggs.head()

# Plotting disposable income for a province vs national weighted average

reg = "Zhejiang"
test = income_long[income_long["Region"] == reg]

plt.plot(test["Year"], test["disposable_income"], label=reg, marker="o")
plt.plot(test["Year"], df_aggs["agg_disposable_income"], label="National (Weighted) Average", marker="o")
plt.legend()
plt.title(f"Disposable income of {reg} vs. National Average of Disposable Income")
plt.xlabel("Year")
plt.tick_params(axis='both', which='major', labelsize=7)

#Create indicator variable of 0 for Year < 2016 and 1 for Year >= 2016 to denote the implementation of universal Two-child policy in 2016.

df_merged["indicator2016"] = (pd.to_datetime(df_merged["Year"]).dt.year >= 2016).astype(int)

from statsmodels.api import OLS
import statsmodels.formula.api as smf

# scaling does not really affect the result of statistical inference
# https://stats.stackexchange.com/questions/29781/when-conducting-multiple-regression-when-should-you-center-your-predictor-varia
# we won't scale the covariates here so that it's easier to interpret the slope coefficients.

# change the group to Region

X = df_merged.drop(columns=["Year", "birth_rate"])
y = df_merged["birth_rate"]

linear_model = smf.ols(formula="birth_rate ~ C(Region) + disposable_income + unemp_rate + NGDP + inflation_rate + C(indicator2016)*wage", 
                       data=df_merged)
results = linear_model.fit(cov_type="cluster", cov_kwds={"groups": X["Region"]})

results.summary().tables[0]

/Users/tien/opt/anaconda3/envs/econ323/lib/python3.12/site-packages/statsmodels/base/model.py:1896: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 37, but rank is 7
  warnings.warn('covariance of constraints does not have full '

results.pvalues[(results.pvalues < 0.05) & (~results.pvalues.index.str.contains("C(Region)", regex=False))]

Intercept         1.028770e-14
NGDP              9.138713e-03
inflation_rate    4.416226e-02
dtype: float64

results.summary().tables[1]

results.summary2().tables[1].iloc[[0,-6,-5,-4,-3,-2,-1],:]\
        .assign(isSignificant = lambda x: x["P>|z|"] < 0.05) # just the coefficients for the one we're intered in

from statsmodels.stats.outliers_influence import variance_inflation_factor

result_dict = {"predictor": X.columns[1:], "VIF" : []}
for idx, col in enumerate(X.columns[1:]):
    vif = variance_inflation_factor(X.iloc[:,1:7], idx)
    result_dict["VIF"].append(vif)
    
pd.DataFrame(result_dict)

refit_lm = smf.ols(formula="birth_rate ~ C(Region) + unemp_rate + disposable_income + NGDP + inflation_rate + indicator2016", 
                   data=df_merged)
refit_lm_results = refit_lm.fit(cov_type="cluster", cov_kwds={"groups": X["Region"]})
refit_lm_results.summary().tables[0]

/Users/tien/opt/anaconda3/envs/econ323/lib/python3.12/site-packages/statsmodels/base/model.py:1896: ValueWarning: covariance of constraints does not have full rank. The number of constraints is 35, but rank is 5
  warnings.warn('covariance of constraints does not have full '

refit_lm_results.summary2().tables[1].iloc[[0,-5,-4,-3,-2,-1],:]\
        .assign(isSignificant = lambda x: x["P>|z|"] < 0.05) # ignore the dummy region variables

# source codes of script is from
# https://www.statsmodels.org/devel/examples/notebooks/generated/linear_regression_diagnostics_plots.html
from scripts.regression_plots import LinearRegDiagnostic

reg_results = LinearRegDiagnostic(refit_lm_results)

reg_results.residual_plot()

<Axes: title={'center': 'Residuals vs Fitted'}, xlabel='Fitted values', ylabel='Residuals'>

reg_results.qq_plot()

<Axes: title={'center': 'Normal Q-Q'}, xlabel='Theoretical Quantiles', ylabel='Standardized Residuals'>

	Region	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
0	Beijing	8.12	8.24	9.06	9.32	7.96	9.75	8.93	9.05	8.29	7.48	8.06	8.17	8.32	6.26
1	Tianjin	6.73	6.67	7.65	7.37	5.84	8.19	8.28	8.75	8.58	8.18	8.30	8.13	7.91	7.67
2	Hebei	10.83	11.26	13.20	12.42	11.35	13.18	13.04	12.88	13.02	13.22	12.93	13.04	13.33	12.82
3	Shanxi	9.12	9.63	11.06	10.29	9.98	10.92	10.81	10.70	10.47	10.68	10.87	11.32	11.30	11.48
4	Inner Mongolia Autonomous Region	8.23	8.35	9.47	9.03	7.72	9.31	8.98	9.17	8.94	9.30	9.57	9.81	10.21	9.87

	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
count	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000
mean	10.560968	11.105161	12.120968	11.800968	11.150968	11.632581	11.260323	11.422581	11.075484	11.290323	11.380000	11.420645	11.514516	11.402581
std	2.516930	2.649351	2.973572	2.947632	2.942027	2.616709	2.678159	2.527621	2.643896	2.715654	2.666403	2.627924	2.657100	2.943015
min	5.730000	5.980000	6.220000	5.550000	5.840000	6.490000	5.360000	5.730000	5.710000	6.680000	6.060000	6.320000	6.890000	6.260000
25%	8.675000	9.475000	10.385000	10.025000	9.515000	9.940000	9.670000	9.725000	9.530000	9.235000	9.515000	9.440000	9.290000	9.250000
50%	10.700000	11.260000	12.950000	12.180000	11.350000	12.210000	11.410000	11.870000	11.410000	11.270000	11.700000	11.420000	11.300000	11.600000
75%	12.610000	13.230000	14.025000	13.630000	13.100000	13.380000	13.085000	13.265000	13.330000	13.470000	13.485000	13.355000	13.305000	13.500000
max	14.600000	15.220000	17.540000	17.890000	15.750000	16.440000	15.840000	15.480000	15.390000	15.990000	15.990000	16.050000	16.790000	17.400000

	Region	Year	birth_rate
433	Xinjiang Uygur Autonomous Region	2006	15.79
403	Beijing	2006	6.26
404	Tianjin	2006	7.67
405	Hebei	2006	12.82
406	Shanxi	2006	11.48

Region	Beijing	Tianjin	Hebei	Shanxi	Inner Mongolia Autonomous Region	Liaoning	Jilin	Heilongjiang	Shanghai	Jiangsu	...	Chongqing	Sichuan	Guizhou	Yunnan	Tibet Autonomous Region	Shaanxi	Gansu	Qinghai	Ningxia Hui Autonomous Region	Xinjiang Uygur Autonomous Region
Region
Beijing	1.000000	0.193959	0.143385	-0.103150	-0.102938	0.003017	-0.593789	-0.209550	0.492164	0.272591	...	0.599820	0.512758	-0.528546	-0.176018	-0.411343	0.272706	-0.141091	-0.410147	-0.666636	0.090714
Tianjin	0.193959	1.000000	0.842247	0.634058	0.689576	-0.208448	0.169470	0.731270	0.412935	0.583376	...	-0.463204	-0.524597	-0.077743	-0.322821	0.117596	-0.413039	0.412658	0.101425	0.275393	0.510352
Hebei	0.143385	0.842247	1.000000	0.889761	0.830515	0.055003	0.371350	0.777156	0.452846	0.652599	...	-0.396970	-0.541404	-0.062497	0.028154	0.479961	-0.308488	0.707699	0.478559	0.346572	0.834689
Shanxi	-0.103150	0.634058	0.889761	1.000000	0.881077	0.120870	0.453291	0.828652	0.488147	0.379026	...	-0.355843	-0.575026	0.004494	0.113473	0.713772	-0.191598	0.868336	0.635482	0.528506	0.850567
Inner Mongolia Autonomous Region	-0.102938	0.689576	0.830515	0.881077	1.000000	0.327197	0.570785	0.827961	0.503831	0.457959	...	-0.417578	-0.576594	0.209701	0.132142	0.560587	-0.056030	0.725645	0.501802	0.732715	0.603807

	Region	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
0	Beijing	35445.1	33106.0	29883.0	27041.2	24779.1	22926.0	21134.6	19024.7	17188.8	14964.0	12900.9	11813.1	10425.5	8387.0
1	Tianjin	14055.5	13362.9	12450.6	11477.2	10879.5	10640.6	9945.4	9043.0	8112.5	6830.8	5709.6	5182.4	4158.4	3538.2
2	Hebei	34978.6	32494.6	30640.8	28474.1	26398.4	25208.9	24259.6	23077.5	21384.7	18003.6	15306.9	14200.1	12152.9	10043.0
3	Shanxi	16961.6	15958.1	14484.3	11946.4	11836.4	12094.7	11987.2	11683.1	10894.4	8903.9	7147.6	7223.0	5935.6	4713.6
4	Inner Mongolia Autonomous Region	17212.5	16140.8	14898.1	13789.3	12949.0	12158.2	11392.4	10470.1	9458.1	8199.9	7104.2	6242.4	5166.9	4161.8

Economic Development and Fertility in China: Assessing the Impact of the Universal Two-Child Policy¶

1. Introduction¶

1.1 Background¶

1.2 Research Question¶

1.3 General Strategy¶

1.4 Dataset¶

2. Exploratory Data Analysis¶

2.1 Data Analysis and visualizations of birth rate and economic factors¶

2.1.1 Data Analysis of birth rate¶

Line chart of birth rate trend for each province¶

Correlation matrix of provincial birth rate¶

2.1.2 Data Analysis of Nominal GDP per capita¶

Line chart of Nominal GDP per capita trend for each province¶

Correlation matrix of provincial Nominal GDP per capita¶

2.1.3 Data Analysis of average wage¶

Line chart of average wage for each province¶

Correlation matrix of provincial total wages of employed persons in urban units¶

2.1.4 Data Analysis of Disposable Income per capita¶

Line chart of disposable income trend for each province¶

Correlation matrix of provincial disposable income per capita¶

2.1.5 Data Analysis of Unemployment rate¶

Line chart of unemployment rate trend for each province¶

Correlation matrix of provincial unemployment rate¶

2.1.6 Deriving the Inflation Rate using CPI data¶

2.2 Deriving Aggregated (National Average) data¶

3. Model Fitting¶

Using variance inflation factor to identify multicollinearity¶

Interpretation of coefficients:¶

4. Conclusion¶

References¶

	Region	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
0	Beijing	2190	2192	2194	2195	2188	2171	2125	2078	2024	1962	1860	1771	1676	1601
1	Tianjin	1385	1383	1410	1443	1439	1429	1410	1378	1341	1299	1228	1176	1115	1075
2	Hebei	7447	7426	7409	7375	7345	7323	7288	7262	7232	7194	7034	6989	6943	6898
3	Shanxi	3497	3502	3510	3514	3519	3528	3535	3548	3562	3574	3427	3411	3393	3375
4	Inner Mongolia Autonomous Region	2415	2422	2433	2436	2440	2449	2455	2464	2470	2472	2458	2444	2429	2415

	Region	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
0	Beijing	16.184977	15.103102	16.184977	12.319453	11.325000	10.560111	9.945694	9.155294	8.492490	7.626911	6.935968	6.670299	6.220465	5.238601
1	Tianjin	10.148375	9.662256	10.148375	7.953708	7.560459	7.446186	7.053475	6.562409	6.049590	5.258507	4.649511	4.406803	3.729507	3.291349
2	Hebei	4.697006	4.375788	4.697006	3.860895	3.594064	3.442428	3.328705	3.177844	2.956955	2.502585	2.176130	2.031779	1.750382	1.455929
3	Shanxi	4.850329	4.556853	4.850329	3.399659	3.363569	3.428203	3.391004	3.292869	3.058506	2.491298	2.085673	2.117561	1.749366	1.396622
4	Inner Mongolia Autonomous Region	7.127329	6.664244	7.127329	5.660632	5.306967	4.964557	4.640489	4.249229	3.829190	3.317112	2.890236	2.554173	2.127172	1.723313

	Region	Year	ngdp_per_capita
433	Xinjiang Uygur Autonomous Region	2006	1.442585
403	Beijing	2006	5.238601
404	Tianjin	2006	3.291349
405	Hebei	2006	1.455929
406	Shanxi	2006	1.396622

	Region	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
0	Beijing	145766	131700	119928	111390	102268	93006	84742	68174	59427	49722	44961	42328	34692	29992
1	Tianjin	100731	94534	86305	80090	72773	67773	61514	66928	57367	41973	37444	35101	30378	25586
2	Hebei	68717	63036	55334	50921	45114	41501	38658	38105	31546	26191	22983	20283	16489	13823
3	Shanxi	65917	60061	53705	51803	48969	46407	44236	40029	33225	26672	23273	20731	17301	14328
4	Inner Mongolia Autonomous Region	73835	66679	61067	57135	53748	50723	46557	42297	36324	28760	24834	21197	17855	14977

	Region	Year	wage
433	Xinjiang Uygur Autonomous Region	2006	17062
403	Beijing	2006	29992
404	Tianjin	2006	25586
405	Hebei	2006	13823
406	Shanxi	2006	14328

	Region	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
0	Beijing	67756	62361	57230	52530	48458	44489	40830	36817	33176	29228	26571	24371	21458	19296
1	Tianjin	42404	39506	37022	34074	31291	28832	26359	24030	21714	19266	16967	15444	13116	11526
2	Hebei	25665	23446	21484	19725	18118	16647	15190	13647	12059	10428	9267	8365	7233	6295
3	Shanxi	23828	21990	20420	19049	17854	16538	15120	13592	11959	10149	8911	8333	7282	6235
4	Inner Mongolia Autonomous Region	30555	28376	26212	24127	22310	20559	18693	16800	14715	12538	11015	9923	8340	6876

	Region	Year	disposable_income
433	Xinjiang Uygur Autonomous Region	2006	5290
403	Beijing	2006	19296
404	Tianjin	2006	11526
405	Hebei	2006	6295
406	Shanxi	2006	6235

	Region	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006	2005
0	Beijing	102.3	102.5	101.9	101.4	101.8	101.6	103.3	103.3	105.6	102.4	98.5	105.1	102.4	100.9	101.5
1	Tianjin	102.7	102.0	102.1	102.1	101.7	101.9	103.1	102.7	104.9	103.5	99.0	105.4	104.2	101.5	101.5
2	Hebei	103.0	102.4	101.7	101.5	100.9	101.7	103.0	102.6	105.7	103.1	99.3	106.2	104.7	101.7	101.8
3	Shanxi	102.7	101.8	101.1	101.1	100.6	101.7	103.1	102.5	105.2	103.0	99.6	107.2	104.6	102.0	102.3
4	Inner Mongolia Autonomous Region	102.4	101.8	101.7	101.2	101.1	101.6	103.2	103.1	105.6	103.2	99.7	105.7	104.6	101.5	102.4

Region	Beijing	Tianjin	Hebei	Shanxi	Inner Mongolia Autonomous Region	Liaoning	Jilin	Heilongjiang	Shanghai	Jiangsu	...	Chongqing	Sichuan	Guizhou	Yunnan	Tibet Autonomous Region	Shaanxi	Gansu	Qinghai	Ningxia Hui Autonomous Region	Xinjiang Uygur Autonomous Region
2006	-0.591133	0.000000	-0.098232	-0.293255	-0.878906	-0.197239	-0.098522	0.691700	0.198020	-0.489716	...	1.587302	0.589971	0.693069	0.493097	0.492611	0.296443	-0.393314	0.793651	0.394089	0.595829
2007	1.486620	2.660099	2.949853	2.549020	3.054187	3.853755	3.846154	3.238469	1.976285	2.657480	...	2.246094	3.519062	4.621436	3.925417	1.372549	3.645320	3.849951	5.019685	3.532875	4.146101
2008	2.636719	1.151631	1.432665	2.485660	1.051625	-0.475737	-0.189934	0.380228	2.519380	1.054650	...	0.859599	-0.755430	1.127820	-0.188857	2.224371	1.140684	2.851711	3.186504	2.843602	2.464455
2009	-6.279734	-6.072106	-6.497175	-7.089552	-5.676443	-4.397706	-4.757374	-5.113636	-5.860113	-5.502846	...	-6.818182	-4.091342	-8.271375	-5.014191	-4.068117	-5.545113	-6.377079	-6.811989	-7.188940	-6.845513
2010	3.959391	4.545455	3.826788	3.413655	3.510532	3.000000	3.596404	3.692615	3.514056	4.216867	...	4.878049	2.380952	4.255319	3.286853	0.788955	3.482587	2.764067	2.729045	3.376365	3.574975

	Year	agg_wage	agg_birth_rate	agg_disposable_income	agg_unemp_rate	agg_inflation_rate
0	2006	13700.185264	11.111821	7396.608888	3.819470	-0.276876
1	2007	16500.054819	11.177068	8621.689385	3.669165	3.389946
2	2008	19512.983596	11.179822	9958.024849	3.728662	1.008213
3	2009	22148.162908	11.168745	10956.793587	3.677729	-6.172374
4	2010	25755.778123	11.091313	12495.516707	3.584803	3.894455

	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
count	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000
mean	6.874240	6.428165	6.874240	5.309175	4.937917	4.669268	4.344792	3.970445	3.617963	3.091418	2.650944	2.464197	2.116261	1.757959
std	3.101732	2.936479	3.101732	2.431799	2.215270	2.052659	1.928088	1.814868	1.742264	1.615138	1.494381	1.435141	1.338889	1.156927
min	3.474811	3.222306	3.474811	2.741230	2.598732	2.494724	2.195237	1.879621	1.590822	1.298936	1.090387	0.974555	0.784003	0.613577
25%	4.888444	4.543684	4.888444	3.870482	3.539666	3.435315	3.251177	2.958459	2.618698	2.154314	1.752913	1.597454	1.313620	1.075172
50%	5.551886	5.155883	5.551886	4.273981	3.951534	3.772332	3.533696	3.233687	2.956955	2.482938	2.026720	1.930554	1.602066	1.322993
75%	7.534667	6.963664	7.534667	5.843668	5.455470	5.070716	4.749154	4.337084	3.935542	3.427564	3.035827	2.845671	2.409991	2.001029
max	16.184977	15.103102	16.184977	12.319453	11.325000	10.560111	9.945694	9.155294	8.493081	7.779158	7.123258	6.789771	6.239680	5.396589

Region	Beijing	Tianjin	Hebei	Shanxi	Inner Mongolia Autonomous Region	Liaoning	Jilin	Heilongjiang	Shanghai	Jiangsu	...	Chongqing	Sichuan	Guizhou	Yunnan	Tibet Autonomous Region	Shaanxi	Gansu	Qinghai	Ningxia Hui Autonomous Region	Xinjiang Uygur Autonomous Region
Region
Beijing	1.000000	0.988898	0.980450	0.973583	0.989211	0.972862	0.964087	0.962535	0.999198	0.993001	...	0.993246	0.995490	0.998196	0.997983	0.998642	0.987071	0.982211	0.995864	0.982703	0.982323
Tianjin	0.988898	1.000000	0.997061	0.990521	0.998316	0.995093	0.989899	0.989514	0.987297	0.997053	...	0.995619	0.998065	0.988440	0.993177	0.989573	0.998935	0.996766	0.996124	0.998856	0.997218
Hebei	0.980450	0.997061	1.000000	0.987707	0.997289	0.996483	0.994455	0.992583	0.979267	0.994595	...	0.992944	0.992875	0.980253	0.985762	0.980809	0.996496	0.995992	0.992091	0.998866	0.993042
Shanxi	0.973583	0.990521	0.987707	1.000000	0.982382	0.986889	0.974623	0.980166	0.969812	0.979356	...	0.975697	0.985911	0.966769	0.977098	0.969330	0.985785	0.982387	0.978491	0.991650	0.988557
Inner Mongolia Autonomous Region	0.989211	0.998316	0.997289	0.982382	1.000000	0.994090	0.991621	0.988354	0.988459	0.999074	...	0.998150	0.997112	0.990666	0.993352	0.991032	0.998518	0.997004	0.997536	0.997278	0.994135

	2019	2018	2017	2016	2015	2014	2013	2012	2011	2010	2009	2008	2007	2006
count	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000	31.000000
mean	80879.903226	73351.709677	67011.870968	61764.806452	55217.645161	50658.032258	45816.354839	40260.548387	34216.032258	27423.225806	23770.483871	21135.032258	17978.032258	14839.741935
std	19887.954261	18655.046113	17475.427729	16265.241415	13864.212743	12598.210604	11087.171017	10699.507212	9277.699232	6926.298827	6540.602479	6349.368454	5319.142954	4571.682338
min	60780.000000	55495.000000	49505.000000	45403.000000	42179.000000	38301.000000	36386.000000	27959.000000	22636.000000	17629.000000	14758.000000	12645.000000	11072.000000	9315.000000
25%	70413.500000	63532.000000	57726.500000	52460.500000	47038.500000	42839.500000	38814.500000	32730.500000	27628.000000	23070.500000	19578.500000	17263.500000	14648.500000	12034.500000
50%	74378.000000	67727.000000	61663.000000	57270.000000	51825.000000	47446.000000	43073.000000	37251.000000	31516.000000	25056.000000	20955.000000	18828.000000	16014.000000	13427.000000
75%	81808.000000	73748.000000	66434.000000	60816.500000	56336.000000	51058.000000	46996.500000	42902.500000	37268.500000	28966.000000	25139.500000	22312.500000	19826.500000	16163.500000
max	145766.000000	131700.000000	119935.000000	111390.000000	102268.000000	93006.000000	84742.000000	68174.000000	59427.000000	49722.000000	44961.000000	42328.000000	34692.000000	29992.000000

Region	Beijing	Tianjin	Hebei	Shanxi	Inner Mongolia Autonomous Region	Liaoning	Jilin	Heilongjiang	Shanghai	Jiangsu	...	Chongqing	Sichuan	Guizhou	Yunnan	Tibet Autonomous Region	Shaanxi	Gansu	Qinghai	Ningxia Hui Autonomous Region	Xinjiang Uygur Autonomous Region
Region
Beijing	1.000000	0.979892	0.991177	0.987305	0.991348	0.967923	0.999348	0.991762	0.998628	0.993551	...	0.994499	0.999503	0.998678	0.992230	0.969215	0.997482	0.998548	0.994097	0.985419	0.985845
Tianjin	0.979892	1.000000	0.993484	0.990703	0.992623	0.995953	0.984651	0.995245	0.981332	0.957674	...	0.993811	0.976924	0.982793	0.986880	0.928419	0.986643	0.979538	0.992470	0.996142	0.998385
Hebei	0.991177	0.993484	1.000000	0.989451	0.992787	0.989155	0.994315	0.997669	0.988396	0.971477	...	0.994524	0.988296	0.992192	0.997632	0.953763	0.992516	0.992792	0.998260	0.996950	0.994988
Shanxi	0.987305	0.990703	0.989451	1.000000	0.998915	0.985975	0.988925	0.990817	0.987221	0.975816	...	0.995330	0.985500	0.985020	0.980997	0.926149	0.995144	0.982658	0.992115	0.991557	0.993083
Inner Mongolia Autonomous Region	0.991348	0.992623	0.992787	0.998915	1.000000	0.987348	0.993115	0.994762	0.991559	0.979646	...	0.998268	0.990009	0.990112	0.986419	0.936608	0.997687	0.987784	0.995565	0.993469	0.995345

Dep. Variable:	birth_rate	R-squared:	0.910
Model:	OLS	Adj. R-squared:	0.902
Method:	Least Squares	F-statistic:	1.243e+04
Date:	Fri, 19 Apr 2024	Prob (F-statistic):	3.94e-50
Time:	13:06:54	Log-Likelihood:	-524.91
No. Observations:	434	AIC:	1126.
Df Residuals:	396	BIC:	1281.
Df Model:	37
Covariance Type:	cluster

	coef	std err	z	P>\|z\|	[0.025	0.975]
Intercept	11.5863	1.498	7.736	0.000	8.651	14.522
C(Region)[T.Beijing]	-2.9585	0.856	-3.456	0.001	-4.636	-1.281
C(Region)[T.Chongqing]	-2.0049	0.155	-12.896	0.000	-2.310	-1.700
C(Region)[T.Fujian]	-0.0629	0.205	-0.307	0.759	-0.464	0.338
C(Region)[T.Gansu]	0.1577	0.312	0.506	0.613	-0.454	0.769
C(Region)[T.Guangdong]	-2.4935	0.690	-3.613	0.000	-3.846	-1.141
C(Region)[T.Guangxi Zhuang Autonomous Region]	1.6766	0.142	11.774	0.000	1.398	1.956
C(Region)[T.Guizhou]	1.1122	0.172	6.455	0.000	0.775	1.450
C(Region)[T.Hainan]	2.6653	0.423	6.295	0.000	1.835	3.495
C(Region)[T.Hebei]	-0.4136	0.100	-4.153	0.000	-0.609	-0.218
C(Region)[T.Heilongjiang]	-5.7115	0.333	-17.175	0.000	-6.363	-5.060
C(Region)[T.Henan]	-1.3719	0.234	-5.875	0.000	-1.830	-0.914
C(Region)[T.Hubei]	-2.2896	0.065	-35.295	0.000	-2.417	-2.162
C(Region)[T.Hunan]	-0.2525	0.219	-1.154	0.248	-0.681	0.176
C(Region)[T.Inner Mongolia Autonomous Region]	-3.2697	0.265	-12.357	0.000	-3.788	-2.751
C(Region)[T.Jiangsu]	-4.7025	0.529	-8.884	0.000	-5.740	-3.665
C(Region)[T.Jiangxi]	0.9558	0.144	6.653	0.000	0.674	1.237
C(Region)[T.Jilin]	-5.8401	0.221	-26.448	0.000	-6.273	-5.407
C(Region)[T.Liaoning]	-6.3465	0.244	-26.055	0.000	-6.824	-5.869
C(Region)[T.Ningxia Hui Autonomous Region]	1.6209	0.406	3.994	0.000	0.826	2.416
C(Region)[T.Qinghai]	2.5575	0.284	9.017	0.000	2.002	3.113
C(Region)[T.Shaanxi]	-2.3614	0.083	-28.507	0.000	-2.524	-2.199
C(Region)[T.Shandong]	-0.9865	0.367	-2.692	0.007	-1.705	-0.268
C(Region)[T.Shanghai]	-4.0453	0.744	-5.440	0.000	-5.503	-2.588
C(Region)[T.Shanxi]	-1.7979	0.148	-12.153	0.000	-2.088	-1.508
C(Region)[T.Sichuan]	-3.2806	0.265	-12.398	0.000	-3.799	-2.762
C(Region)[T.Tianjin]	-4.1660	0.432	-9.636	0.000	-5.013	-3.319
C(Region)[T.Tibet Autonomous Region]	3.9187	0.356	11.014	0.000	3.221	4.616
C(Region)[T.Xinjiang Uygur Autonomous Region]	2.7242	0.210	12.953	0.000	2.312	3.136
C(Region)[T.Yunnan]	0.2271	0.197	1.154	0.248	-0.159	0.613
C(Region)[T.Zhejiang]	-2.5289	0.330	-7.654	0.000	-3.177	-1.881
C(indicator2016)[T.1]	0.5891	0.533	1.104	0.269	-0.456	1.635
disposable_income	-2.256e-05	3.44e-05	-0.656	0.512	-9e-05	4.49e-05
unemp_rate	0.2707	0.372	0.727	0.467	-0.459	1.001
NGDP	4.445e-05	1.71e-05	2.607	0.009	1.1e-05	7.79e-05
inflation_rate	-0.0187	0.009	-2.013	0.044	-0.037	-0.000
wage	-7.676e-06	1.02e-05	-0.755	0.450	-2.76e-05	1.23e-05
C(indicator2016)[T.1]:wage	-6.265e-06	7.59e-06	-0.826	0.409	-2.11e-05	8.6e-06

	Coef.	Std.Err.	z	P>\|z\|	[0.025	0.975]	isSignificant
Intercept	11.586309	1.497781	7.735649	1.028770e-14	8.650712	14.521906	True
disposable_income	-0.000023	0.000034	-0.655510	5.121392e-01	-0.000090	0.000045	False
unemp_rate	0.270661	0.372439	0.726726	4.673941e-01	-0.459306	1.000629	False
NGDP	0.000044	0.000017	2.606821	9.138713e-03	0.000011	0.000078	True
inflation_rate	-0.018656	0.009270	-2.012547	4.416226e-02	-0.036825	-0.000487	True
wage	-0.000008	0.000010	-0.754693	4.504331e-01	-0.000028	0.000012	False
C(indicator2016)[T.1]:wage	-0.000006	0.000008	-0.825970	4.088213e-01	-0.000021	0.000009	False

	predictor	VIF
0	wage	26.005184
1	disposable_income	22.025988
2	unemp_rate	3.597108
3	NGDP	3.273778
4	inflation_rate	1.017998
5	indicator2016	2.930799

	Coef.	Std.Err.	z	P>\|z\|	[0.025	0.975]	isSignificant
Intercept	11.571674	1.271117	9.103544	8.743089e-20	9.080330	14.063019	True
unemp_rate	0.288699	0.323049	0.893669	3.714989e-01	-0.344466	0.921865	False
disposable_income	-0.000049	0.000024	-2.095450	3.613098e-02	-0.000095	-0.000003	True
NGDP	0.000047	0.000018	2.712801	6.671707e-03	0.000013	0.000082	True
inflation_rate	-0.018930	0.009562	-1.979650	4.774283e-02	-0.037672	-0.000188	True
indicator2016	0.171696	0.209848	0.818191	4.132481e-01	-0.239599	0.582992	False