urbansim_templates icon indicating copy to clipboard operation
urbansim_templates copied to clipboard

Small Multinomial Logit

Open jdcaicedo251 opened this issue 4 years ago • 0 comments

There seems to be a problem with SmallMultinomialLogitStep(). I created a random table to simulate a common choice scenario. Each observation has 2 attributes age and income. There are 4 choice is choice set (1,2,3,4). I run a model based on this table with the same specification in pylogit, statmodels and urbansim templates. Pylogit and statmodels estimate the same coefficients.

# Importing libraries
import pandas as pd
import numpy as np 
import orca
import random 
from collections import OrderedDict
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import SmallMultinomialLogitStep
from statsmodels.discrete.discrete_model import MNLogit
import pylogit as pl

# Creating the table
X = pd.DataFrame({'interc': 1,
                  'age': [random.randint(1,20) for x in range(1000)] })

x_1 = pd.concat([X, pd.get_dummies([random.randint(1,5) for x in range(1000)], prefix='income', prefix_sep='_')], axis=1)
x_1['y'] = [random.randint(1,4) for x in range(1000)]

x_1.head()

Urbansim Template


## Adding the table to orca
orca.add_table('school_trip', x_1)

## Creating model speficication. 
example_specification = OrderedDict()
example_names = OrderedDict()

example_specification["intercept"] = [2, 3, 4]
example_names["intercept"] = ['ASC 2', 'ASC 3', 'ASC 4' ]

example_specification["age"] = [2, 3, 4]
example_names["age"] = ['age 2', 'age 3', 'age 4' ]

example_specification["income_1"] = [2, 3, 4]
example_names["income_1"] = ['income_1_2', 'income_1_3', 'income_1_4' ]

example_specification["income_2"] = [2, 3, 4]
example_names["income_2"] = ['income_2_2', 'income_2_3', 'income_2_4' ]

example_specification["income_3"] = [2, 3, 4]
example_names["income_3"] = ['income_3_2', 'income_3_3', 'income_3_4' ]

example_specification["income_4"] = [2, 3, 4]
example_names["income_4"] = ['income_4_2', 'income_4_3', 'income_4_4' ]

## Model estiamtion
m = SmallMultinomialLogitStep()
m.name = 'STOD_choice'
m.tables = ['school_trip']
m.choice_column = 'y'
m.model_expression = example_specification
m.fit()

Statmodels MNLogit

MNLogit(x_1.y, x_1.drop(['y','income_5'],axis = 1)).fit().summary().add_table_params

Pylogit


## Getting table in long format
dfs = []
for x in range(len(x_1)):
    df = pd.DataFrame({'obs_id': x + 1,
                       'alt_id':[1, 2, 3, 4],
                       'age': x_1.loc[x,'age'],
                       'income_1':x_1.loc[x,'income_1'],
                       'income_2':x_1.loc[x,'income_2'],
                       'income_3':x_1.loc[x,'income_3'],
                       'income_4':x_1.loc[x,'income_4'],
                       'y':x_1.loc[x,'y'],})
    dfs.append(df)

x_3 = pd.concat(dfs)
x_3['chosen'] = (x_3.alt_id == x_3.y).astype(int)
x_3.head()

## Model estimation
example_mnl = pl.create_choice_model(data=x_3,
                                     alt_id_col='alt_id',
                                     obs_id_col='obs_id',
                                     choice_col='chosen',
                                     specification=example_specification,
                                     model_type="MNL",
                                     names=example_names)

example_mnl.fit_mle(np.zeros(18))
example_mnl.get_statsmodels_summary()

`

jdcaicedo251 avatar Aug 29 '19 21:08 jdcaicedo251