ActivitySim · fscottfoti · Mar 9, 2015 · Feb 24, 2015 · Feb 24, 2015 · Feb 24, 2015
diff --git a/activitysim/activitysim.py b/activitysim/activitysim.py
@@ -14,13 +14,16 @@ def random_rows(df, n):
 
 def read_model_spec(fname,
                     description_name="Description",
-                    expression_name="Expression"):
+                    expression_name="Expression",
+                    stack=True):
     """
     Read in the excel file and reformat for machines
     """
     cfg = pd.read_csv(fname)
     # don't need description and set the expression to the index
-    cfg = cfg.drop(description_name, axis=1).set_index(expression_name).stack()
+    cfg = cfg.drop(description_name, axis=1).set_index(expression_name)
+    if stack:
+        cfg = cfg.stack()
     return cfg
 
 
@@ -93,7 +96,11 @@ def simple_simulate(choosers, alternatives, spec,
             if mult_by_alt_col:
                 expr = "({}) * df.{}".format(expr[0][1:], expr[1])
             else:
-                expr = expr[0][1:]
+                if isinstance(expr, tuple):
+                    expr = expr[0][1:]
+                else:
+                    # it's already a string, but need to remove the "@"
+                    expr = expr[1:]
             try:
                 s = eval(expr)
             except Exception as e:
@@ -103,7 +110,11 @@ def simple_simulate(choosers, alternatives, spec,
             if mult_by_alt_col:
                 expr = "({}) * {}".format(*expr)
             else:
-                expr = expr[0]
+                if isinstance(expr, tuple):
+                    expr = expr[0]
+                else:
+                    # it's already a string, which is fine
+                    pass
             try:
                 s = df.eval(expr)
             except Exception as e:

diff --git a/activitysim/defaults/datasources.py b/activitysim/defaults/datasources.py
@@ -17,7 +17,7 @@
 pd.options.mode.chained_assignment = None
 
 
-@sim.injectable('settings', cache=True)
+@sim.injectable(cache=True)
 def settings():
     with open(os.path.join(misc.configs_dir(), "settings.yaml")) as f:
         settings = yaml.load(f)
@@ -27,24 +27,24 @@ def settings():
         return settings
 
 
-@sim.injectable('run_number')
+@sim.injectable()
 def run_number():
     return misc.get_run_number()
 
 
-@sim.injectable('uuid', cache=True)
-def uuid_hex():
+@sim.injectable(cache=True)
+def uuid():
     return uuid.uuid4().hex
 
 
-@sim.injectable('store', cache=True)
-def hdfstore(settings):
+@sim.injectable(cache=True)
+def store(settings):
     return pd.HDFStore(
         os.path.join(misc.data_dir(), settings["store"]),
         mode='r')
 
 
-@sim.injectable("scenario")
+@sim.injectable()
 def scenario(settings):
     return settings["scenario"]
 

diff --git a/activitysim/defaults/variables.py b/activitysim/defaults/variables.py
@@ -20,6 +20,11 @@ def income_segment(households):
                   labels=[1, 2, 3, 4])
 
 
+@sim.column("households")
+def non_workers(households, persons):
+    return persons.household_id.value_counts() - households.workers
+
+
 @sim.column("households")
 def drivers(households, persons):
     # we assume that everyone 16 and older is a potential driver

diff --git a/example/README.md b/example/README.md
@@ -0,0 +1,77 @@
+This is a list of items to double check before using in practice:
+
+* Make sure the units in things like distance_to_work match the walk thresholds
+ in the mandatory tour frequency spec.  The original divided by 100.  This is
+  true also of round trip auto to work and round trip auto to school.
+
+* There might be a few variables left off of some of the models.  Look for
+`head` in reading of the spec files as this is meant to eliminate some of the
+ rows.  Also can look for `#` to comment out variables in the spec.
+
+* Go back to the 3 school location choices, and run the models for the
+appropriate persons.
+
+* Probably needs code review of the variable definitions.  How much of the
+variable definitions are shared between regions and how much unique?  Age
+categories are shared?  Income categories are unique?
+
+* This will be pretty easy to catch, but need to make sure the 
+non_mandatory_tour model runs with Matt's changes to simple simulate that are
+ coming.
+
+
+
+A few overarching principles
+
+* A little discussion of "NOT so object oriented" - this is more like a
+database - data is in standard tables, NOT in objects...  although the
+simulation framework is sort of like adding methods to objects
+
+* The implications of this are that most of the core code is pandas and thus
+the quality is controlled by the larger community.  We are thankful that its
+quality is very high.  Specifically, there's not so much code in activitysim
+"proper"
+
+* What it takes to add a new model
+    * define a new model
+    * define any new data sources necessary
+    * add any new assumptions in settings.yaml
+    * co-create the spec and any variables that are too complicated (or
+    reusable) for the spec
+    * run in notebook
+
+* Literally everything is really Python functions that compute something.  
+Case study of `num_under16_not_at_school` to show the inter-dependencies.
+
+
+
+
+A few questions about "best practices"
+
+* What to put into the default data sources and variable specs and what to
+put in the example / client-specific stuff?
+
+* Want to split up injectables from variables from tables or all one big file
+ so it's easier to search?
+
+* How much variable computation to put in excel versus Python
+
+* There were some hard coded limits in the original csv - (area_type < 4 and
+distance_to_work < 3) - these are now just left in the csv spec.  Why would
+this be different than (income_in_thousands > 50)?  I've made an effort to
+not have such "magic numbers" in Python code.  EDIT: I've now added an 
+`isurban` variable which reads the area_type from the settings.yaml.  So my 
+convention so far is to leave hard-coded numbers out of the Python, 
+but putting them in the CSV is ok.  (Elizabeth: MAX_NUM_AUTOS exists now)
+
+* Want to name or number the person types in the spec files?
+
+* We're verging on the need to use YAML to configure the model runs - give 
+the non_mandatory_tour model as an example.  Is this too much code for a 
+modeler to manage or is this just right as it makes the model execution 
+transparent to the modeler?
+
+* Big issue: testing for client-specific code?  It's harder because outputs are "data
+dependent."  It's easier to take a small dataset and make sure it always runs.
+
+* Should I go back and put the Q&A I've had with Dave as issues on github to save for posterity?
diff --git a/example/configs/auto_ownership.csv b/example/configs/auto_ownership.csv
@@ -0,0 +1,30 @@
+Description,Expression,cars0,cars1,cars2,cars3,cars4
+2 Adults (age 16+),drivers==2,,0,3.0773,3.1962,2.6616
+3 Adults (age 16+),drivers==3,,0,3.5401,5.5131,5.208
+4+ Adults (age 16+),drivers>3,,2.0107,6.3662,8.5148,9.5807
+Persons age 16-17,num_adolescents,,0,-0.881,-1.7313,-1.7313
+Persons age 18-24,num_college_age,,-0.4087,-1.0095,-1.0107,-1.0107
+Persons age 35-34,num_young_adults,,0,-0.4849,-0.8596,-0.8596
+Presence of children age 0-4,num_young_children>0,,0.3669,0.7627,0.7627,0.7627
+Presence of children age 5-17,(num_children+num_adolescents)>0,,0.0158,0.2936,0.4769,0.4769
+"Number of workers, capped at 3",@df.workers.clip(upper=3),,0,0.2936,0.6389,0.8797
+"Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,0.0383,0.054,0.0559,0.0619
+"Piecewise Linear household income, $30-75k","@(df.income_in_thousands-30).clip(0, 45)",,0,0.0083,0.011,0.0147
+"Piecewise Linear household income, $75k+, capped at $125k","@(df.income_in_thousands-75).clip(0, 50)",,0,0.0083,0.011,0.0147
+"Density index up to 10, if 0 workers","@(df.workers==0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654
+"Density index in excess of 10, if 0 workers",@(df.workers==0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766
+"Density index up to 10, if 1+ workers","@(df.workers>0)*df.density_index.clip(0, 10)",,0,-0.2028,-0.3654,-0.3654
+"Density index in excess of 10, if 1+ workers",@(df.workers>0)*(df.density_index-10).clip(0),,-0.0152,-0.1106,-0.1766,-0.1766
+Constants,@1,,1.1865,-1.0846,-3.2502,-5.313
+San Francisco county,county_name == 'San Francisco',,0.4259,0.4683,0.1458,0.1458
+Solano county,county_name == 'Solano',,-0.566,-0.4429,-0.2372,-0.2372
+Napa county,county_name == 'Napa',,-0.566,-0.4429,-0.2372,-0.2372
+Sonoma county,county_name == 'Sonoma',,-0.566,-0.4429,-0.2372,-0.2372
+Marin county,county_name == 'Marin',,-0.2434,0,0,0
+"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 0 workers",(workers==0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.0626,0.0626,0.0626,0.0626
+"Retail accessibility (0.66*PK + 0.34*OP) by auto, if 1+ workers",(workers>0)*(0.66*AUTOPEAKRETAIL+0.34*AUTOOFFPEAKRETAIL),,0.1646,0.1646,0.1646,0.1646
+"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 0 workers",(workers==0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.3053,-0.3053,-0.3053,-0.3053
+"Retail accessibility (0.66*PK + 0.34*OP) by transit, if 1+ workers",(workers>0)*(0.66*TRANSITPEAKRETAIL+0.34*TRANSITOFFPEAKRETAIL),,-0.5117,-0.5117,-0.5117,-0.5117
+"Retail accessibility by non-motorized, if 0 workers",(workers==0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03
+"Retail accessibility by non-motorized, if 1+ workers",(workers>0)*NONMOTORIZEDRETAIL,,-0.03,-0.03,-0.03,-0.03
+"Auto time savings per worker (over walk or transit, max 120) to work",workTourAutoTimeSavings/workers,,0.4707,0.6142,0.5705,0.7693
diff --git a/example/configs/auto_ownership_coeffs.csv b/example/configs/auto_ownership_coeffs.csv
diff --git a/example/configs/destination_choice.csv b/example/configs/destination_choice.csv
@@ -0,0 +1,10 @@
+Description,Expression,university,highschool,gradeschool,escortkids,escortnokids,shopping,eatout,othmaint,social,othdiscr,workbased
+Sample of alternatives correction factor,"min(dcSoaCorrections,60)",1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000
+"Distance, piecewise linear from 0 to 1 miles","if(distance<1.0,distance,1.0)",-3.2451,-0.9523,-1.6419,-0.1499,-0.1499,0.0000,-0.5609,0.0000,-0.7841,-0.1677,-0.7926
+"Distance, piecewise linear from 1 to 2 miles","if(distance<2.0,distance-1.0,1.0)",-2.7011,-0.5700,-0.5700,-0.1499,-0.1499,0.0000,-0.5609,0.0000,-0.7841,-0.1677,-0.7926
+"Distance, piecewise linear from 2 to 5 miles","if(distance<5.0,distance-2.0,3.0)",-0.5707,-0.5700,-0.5700,-0.8671,-0.8671,-0.5655,-0.3192,-0.6055,-0.3485,-0.4955,-0.5197
+"Distance, piecewise linear from 5 to 15 miles","if(distance<15.0,distance-5.0,10.0)",-0.5002,-0.1930,-0.2031,-0.2137,-0.2137,-0.1832,-0.1238,-0.1093,-0.1306,-0.1193,-0.2045
+"Distance, piecewise linear for 15+ miles",distance-15.0,-0.0730,-0.1882,-0.0460,-0.2137,-0.2137,-0.1832,-0.1238,-0.1093,-0.1306,-0.1193,-0.2045
+Mode choice logsum,mcLogsum,0.5358,0.5358,0.5358,0.6755,0.6755,0.6755,0.6755,0.6755,0.6755,0.6755,0.5136
+Size variable,sizeTerm,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000
+No attractions,"if(sizeTerm==0,1,0)",-999.0000,-999.0000,-999.0000,-999.0000,-999.0000,-999.0000,-999.0000,-999.0000,-999.0000,-999.0000,-999.0000
diff --git a/example/configs/destination_choice_alternatives_sample.csv b/example/configs/destination_choice_alternatives_sample.csv
@@ -0,0 +1,9 @@
+Description,Expression,worklow,workmed,workhigh,workveryhigh,university,highschool,gradeschool,escortkids,escortnokids,shopping,eatout,othmaint,social,othdiscr,workbased
+"Distance, piecewise linear from 0 to 1 miles",@df.distance.clip(1),-0.8428,-0.8428,-0.8428,-0.8428,-3.2451,-0.9523,-1.6419,-0.1499,-0.1499,0,-0.5609,0,-0.7841,-0.1677,-0.7926
+"Distance, piecewise linear from 1 to 2 miles","@(df.distance-1).clip(0,1)",-0.3104,-0.3104,-0.3104,-0.3104,-2.7011,-0.57,-0.57,-0.1499,-0.1499,0,-0.5609,0,-0.7841,-0.1677,-0.7926
+"Distance, piecewise linear from 2 to 5 miles","@(df.distance-2).clip(0,3)",-0.3783,-0.3783,-0.3783,-0.3783,-0.5707,-0.57,-0.57,-0.8671,-0.8671,-0.5655,-0.3192,-0.6055,-0.3485,-0.4955,-0.5197
+"Distance, piecewise linear from 5 to 15 miles","@(df.distance-5).clip(0,10)",-0.1285,-0.1285,-0.1285,-0.1285,-0.5002,-0.193,-0.2031,-0.2137,-0.2137,-0.1832,-0.1238,-0.1093,-0.1306,-0.1193,-0.2045
+"Distance, piecewise linear for 15+ miles",@(df.distance-15.0).clip(0),-0.0917,-0.0917,-0.0917,-0.0917,-0.073,-0.1882,-0.046,-0.2137,-0.2137,-0.1832,-0.1238,-0.1093,-0.1306,-0.1193,-0.2045
+Size variable,@(df.income_segment==1)*df.size_low,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+No attractions,@(df.income_segment==1)*df.size_low == 0,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
+Destination taz is home taz,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/example/configs/destination_choice_size_terms.csv b/example/configs/destination_choice_size_terms.csv
@@ -0,0 +1,16 @@
+purpose,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE
+work,low,0,0.129,0.193,0.383,0.12,0.01,0.164,0,0,0,0
+work,med,0,0.12,0.197,0.325,0.139,0.008,0.21,0,0,0,0
+work,high,0,0.11,0.207,0.284,0.154,0.006,0.239,0,0,0,0
+work,veryhigh,0,0.093,0.27,0.241,0.146,0.004,0.246,0,0,0,0
+university,university,0,0,0,0,0,0,0,0,0,0.592,0.408
+school,grade,0,0,0,0,0,0,0,1,0,0,0
+school,high,0,0,0,0,0,0,0,0,1,0,0
+escort,kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0
+escort,no kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0
+shopping,shopping,0,1,0,0,0,0,0,0,0,0,0
+eatOut,eatOut,0,0.742,0,0.258,0,0,0,0,0,0,0
+othMaint,othMaint,0,0.482,0,0.518,0,0,0,0,0,0,0
+social,social,0,0.522,0,0.478,0,0,0,0,0,0,0
+othDiscr,othDiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0
+atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0