econ_data
econ_data copied to clipboard
bd CPS: new variables NCHILDU5 NCHILDU18
The stock variables for number of children and age of children are rigid and missing for many years. However, because the CPS is a household survey and contains relationship variables, we can construct new children count variables that are flexible and available for all years.
To start, create NCHILDU18
and NCHILDU5
that captures number of own kids age 0-17 and number of own kids age 0-4, respectively.
These are added.
I removed these variables because some results using them didn't seem right and I didn't have time to fix them. The code to generate them is:
def family_rel(df):
# Number of children under 18
kids = (df.query('AGE < 18 and PARENT > 0')
.groupby(['HHID', 'HHID2', 'STATE', 'PARENT']).PULINENO.count()
.reset_index()
.rename({'PARENT': 'PULINENO',
'PULINENO': 'NUMCHILD1'}, axis=1))
result = (df.merge(kids, 'left')
.merge(kids.rename({'PULINENO': 'SPOUSE',
'NUMCHILD1': 'NUMCHILD2'}, axis=1), 'left'))
df['NCHILDU18'] = (np.where(result['NUMCHILD1'] > 0, result['NUMCHILD1'],
np.where(result['NUMCHILD2'] > 0, result['NUMCHILD2'], 0))
.astype('int8'))
# Number of children under 5
kids = (df.query('AGE < 5 and PARENT > 0')
.groupby(['HHID', 'HHID2', 'STATE', 'PARENT']).PULINENO.count()
.reset_index()
.rename({'PARENT': 'PULINENO',
'PULINENO': 'NUMCHILD1'}, axis=1))
result2 = (df.merge(kids, 'left')
.merge(kids.rename({'PULINENO': 'SPOUSE',
'NUMCHILD1': 'NUMCHILD2'}, axis=1), 'left'))
df['NCHILDU5'] = (np.where(result2['NUMCHILD1'] > 0, result2['NUMCHILD1'],
np.where(result2['NUMCHILD2'] > 0, result2['NUMCHILD2'], 0))
.astype('int8'))
df = df.drop(['PARENT', 'SPOUSE'], axis=1)
return df
See #112
This was removed from the 1989-93 notebook:
def kids_counter(df):
'''Count number of own kids under 5 and under 18'''
kids = (df.query('AGE < 18 and PARENT > 0')
.groupby(['HHID', 'STATE', 'PARENT']).PULINENO.count()
.reset_index()
.rename({'PARENT': 'PULINENO',
'PULINENO': 'NUMCHILD1'}, axis=1))
result = (df.merge(kids, 'left')
.merge(kids.rename({'PULINENO': 'SPOUSE',
'NUMCHILD1': 'NUMCHILD2'}, axis=1), 'left'))
df['NCHILDU18'] = (np.where(result['NUMCHILD1'] > 0, result['NUMCHILD1'],
np.where(result['NUMCHILD2'] > 0, result['NUMCHILD2'], 0))
.astype('int8'))
kids = (df.query('AGE < 5 and PARENT > 0')
.groupby(['HHID', 'STATE', 'PARENT']).PULINENO.count()
.reset_index()
.rename({'PARENT': 'PULINENO',
'PULINENO': 'NUMCHILD1'}, axis=1))
result2 = (df.merge(kids, 'left')
.merge(kids.rename({'PULINENO': 'SPOUSE',
'NUMCHILD1': 'NUMCHILD2'}, axis=1), 'left'))
df['NCHILDU5'] = (np.where(result2['NUMCHILD1'] > 0, result2['NUMCHILD1'],
np.where(result2['NUMCHILD2'] > 0, result2['NUMCHILD2'], 0))
.astype('int8'))
df = df.drop(['PARENT', 'SPOUSE'], axis=1)
return df