From 1b7646ea2fd3404864929bc67f614e51d24a9234 Mon Sep 17 00:00:00 2001
From: Rahul Iyer <riyer@gopivotal.com>
Date: Mon, 7 Oct 2013 11:16:14 -0700
Subject: [PATCH] Upgrade: Add script to upgrade to v1.3 from v1.0, v1.1, v1.2

Pivotal Tracker: 57615140

Changes:
    - Added all changelist files to upgrade to v1.3
    - Updated script to new upgrade
    - Fixed issues around whitespace between arguments
    - Release notes for v1.3
    - Updated version numbers
    - Update PGXN versioning style to accomodate requirements
---
 ReleaseNotes.txt                              |  36 ++
 deploy/PGXN/CMakeLists.txt                    |   4 +-
 deploy/gppkg/CMakeLists.txt                   |   2 +-
 deploy/postflight.sh                          |   2 +-
 src/config/Version.yml                        |   2 +-
 src/madpack/changelist.yaml                   |  34 +-
 ...t_1.0_1.2.yaml => changelist_1.0_1.3.yaml} |  36 +-
 src/madpack/changelist_1.1_1.3.yaml           |  61 ++
 src/madpack/madpack.py                        |  78 ++-
 src/madpack/upgrade_util.py                   | 536 +++++++++---------
 .../postgres/modules/regress/linear.py_in     | 149 ++++-
 .../postgres/modules/regress/linear.sql_in    | 144 +++--
 .../modules/regress/test/linear.sql_in        |   9 +-
 .../modules/regress/test/logistic.sql_in      |   2 +-
 14 files changed, 710 insertions(+), 385 deletions(-)
 rename src/madpack/{changelist_1.0_1.2.yaml => changelist_1.0_1.3.yaml} (57%)
 create mode 100644 src/madpack/changelist_1.1_1.3.yaml

diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
index 6c82152a1..ecaf4d57d 100644
--- a/ReleaseNotes.txt
+++ b/ReleaseNotes.txt
@@ -8,6 +8,42 @@ A complete list of changes for each release can be obtained by viewing the git
 commit history located at https://github.com/madlib/madlib/commits/master.
 
 Current list of bugs and issues can be found at http://jira.madlib.net.
+--------------------------------------------------------------------------------
+MADlib v1.3
+
+Release Date: 2013-October-03
+
+New Features:
+* Cox Proportional Hazards:
+    - Added stratification support for Cox PH models. Stratification is used as
+    shorthand for building a Cox model that allows for more than one stratum,
+    and hence, allows for more than one baseline hazard function.
+    Stratification provides two pieces of key, flexible functionality for the
+    end user of Cox models:
+        -- Allows a categorical variable Z to be appropriately accounted for in
+        the model without estimating its predictive impact on the response
+        variable.
+        -- Categorical variable Z is predictive/associated with the response
+        variable, but Z may not satisfy the proportional hazards assumption
+    - Added a new function (cox_zph) that tests the proportional hazards
+    assumption of a Cox model. This allows the user to build Cox models and then
+    verify the relevance of the model.
+* NULL Handling:
+    - Modified behavior of linear and logistic regression to 'omit' rows
+    containing NULL values for any of the dependent and independent variables.
+    The number of rows skipped is provided as part of the output table.
+
+Deprecated functions:
+    - Cox Proportional Hazard function has been renamed to 'coxph_train'.
+    Old function names ('cox_prop_hazards' and 'cox_prop_hazards_regr')
+    have been deprecated, and will be removed in the next major version update.
+    - The aggregate form of linear regression ('linregr') has been deprecated.
+    The stored-procedure form ('linregr_train') should be used instead.
+
+Bug Fixes:
+    - Fixed a memory leak in the Apriori algorithm.
+
+
 --------------------------------------------------------------------------------
 MADlib v1.2
 
diff --git a/deploy/PGXN/CMakeLists.txt b/deploy/PGXN/CMakeLists.txt
index 7e57c1966..39c4c8233 100644
--- a/deploy/PGXN/CMakeLists.txt
+++ b/deploy/PGXN/CMakeLists.txt
@@ -3,8 +3,10 @@
 # ------------------------------------------------------------------------------
 
 set(MADLIB_PGXN_RELEASE_NUMBER 1)
+# set(MADLIB_PGXN_VERSION_STR
+#    "${MADLIB_VERSION_MAJOR}.${MADLIB_VERSION_MINOR}.${MADLIB_VERSION_PATCH}release${MADLIB_PGXN_RELEASE_NUMBER}
 set(MADLIB_PGXN_VERSION_STR
-    "${MADLIB_VERSION_MAJOR}.${MADLIB_VERSION_MINOR}.${MADLIB_VERSION_PATCH}release${MADLIB_PGXN_RELEASE_NUMBER}")
+    "${MADLIB_VERSION_MAJOR}.${MADLIB_VERSION_MINOR}.${MADLIB_VERSION_PATCH}")
 set(MADLIB_PGXN_NAME "madlib-pgxn-${MADLIB_PGXN_VERSION_STR}")
 
 configure_file(META.json.in META.json)
diff --git a/deploy/gppkg/CMakeLists.txt b/deploy/gppkg/CMakeLists.txt
index b9a34307a..dfa4c704f 100644
--- a/deploy/gppkg/CMakeLists.txt
+++ b/deploy/gppkg/CMakeLists.txt
@@ -2,7 +2,7 @@
 # Packaging for Greenplum's gppkg
 # ------------------------------------------------------------------------------
 
-set(MADLIB_GPPKG_VERSION "1.7")
+set(MADLIB_GPPKG_VERSION "1.7.1")
 set(MADLIB_GPPKG_RELEASE_NUMBER 1)
 set(MADLIB_GPPKG_RPM_SOURCE_DIR
     "${CMAKE_BINARY_DIR}/_CPack_Packages/Linux/RPM/${CPACK_PACKAGE_FILE_NAME}"
diff --git a/deploy/postflight.sh b/deploy/postflight.sh
index cc106e4df..9e4ea3fb2 100755
--- a/deploy/postflight.sh
+++ b/deploy/postflight.sh
@@ -2,7 +2,7 @@
 
 # $0 - Script Path, $1 - Package Path, $2 - Target Location, and $3 - Target Volumn
 
-MADLIB_VERSION=1.2
+MADLIB_VERSION=1.3
 
 find /usr/local/madlib/bin -type d -exec cp -RPf {} /usr/local/madlib/old_bin \; 2>/dev/null
 find /usr/local/madlib/bin -depth -type d -exec rm -r {} \; 2>/dev/null
diff --git a/src/config/Version.yml b/src/config/Version.yml
index 369bd3fdc..4aa48c34f 100644
--- a/src/config/Version.yml
+++ b/src/config/Version.yml
@@ -1 +1 @@
-version: 1.2
+version: 1.3
diff --git a/src/madpack/changelist.yaml b/src/madpack/changelist.yaml
index 48db15d76..c5cf31071 100644
--- a/src/madpack/changelist.yaml
+++ b/src/madpack/changelist.yaml
@@ -1,4 +1,4 @@
-# Changelist for MADlib version 1.1 to 1.2
+# Changelist for MADlib version 1.2 to 1.3
 
 # This file contains all changes that were introduced in a new version of
 # MADlib. This changelist is used by the upgrade script to detect what objects
@@ -9,11 +9,11 @@
 # file installed on the upgrade version. All other files (that don't have
 # updates), are cleaned up to remove object replacements
 new module:
-    arima:
-    arima_forecast:
 
 # Changes in the types (UDT) including removal and modification
 udt:
+    __logregr_result:
+    linregr_result:
 
 # List of the UDF changes that affect the user externally.  This includes change
 # in function name, change in argument order or argument types, and removal of
@@ -22,10 +22,36 @@ udt:
 # are user views dependent on this function, since the original function will
 # not be present in the upgraded version.
 udf:
+    # linear regression: 'num_processed' added in 'linregr_result'
+    - linregr_final:
+        rettype: schema_madlib.linregr_result
+        argument: schema_madlib.bytea8
+    - linregr_merge_states:
+        rettype: schema_madlib.bytea8
+        argument: schema_madlib.bytea8, schema_madlib.bytea8
+    - linregr_transition:
+        rettype: schema_madlib.bytea8
+        argument: schema_madlib.bytea8, double precision, double precision[]
+
+    # logistic regression: 'num_processed' added in '__logregr_result'
+    - __logregr_cg_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
+    - __logregr_irls_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
+    - __logregr_igd_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
 
 # Changes to aggregates (UDA) including removal and modification
 # Overloaded functions should be mentioned separately
 uda:
-    
+    - linregr:
+        rettype: schema_madlib.linregr_result
+        argument: double precision, double precision[]
+
 # Cast operators (UDC) updated/added in v1.1
 udc:
diff --git a/src/madpack/changelist_1.0_1.2.yaml b/src/madpack/changelist_1.0_1.3.yaml
similarity index 57%
rename from src/madpack/changelist_1.0_1.2.yaml
rename to src/madpack/changelist_1.0_1.3.yaml
index c53e7ba14..3b96f28d2 100644
--- a/src/madpack/changelist_1.0_1.2.yaml
+++ b/src/madpack/changelist_1.0_1.3.yaml
@@ -1,4 +1,4 @@
-# Changelist for MADlib version 1.0 to 1.2
+# Changelist for MADlib version 1.0 to 1.3
 
 # This file contains all changes that were introduced in a new version of
 # MADlib. This changelist is used by the upgrade script to detect what objects
@@ -17,7 +17,8 @@ new module:
 
 # Changes in the types (UDT) including removal and modification
 udt:
-
+    __logregr_result:
+    linregr_result:
 # List of the UDF changes that affect the user externally.  This includes change
 # in function name, change in argument order or argument types, and removal of
 # the function. In each case, the original function is as good as removed and a
@@ -30,9 +31,36 @@ udf:
         rettype: schema_madlib.matrix_result
         argument: matrix_in text, matrix_out text
 
+    # linear regression: 'num_processed' added in 'linregr_result'
+    - linregr_final:
+        rettype: schema_madlib.linregr_result
+        argument: schema_madlib.bytea8
+    - linregr_merge_states:
+        rettype: schema_madlib.bytea8
+        argument: schema_madlib.bytea8, schema_madlib.bytea8
+    - linregr_transition:
+        rettype: schema_madlib.bytea8
+        argument: schema_madlib.bytea8, double precision, double precision[]
+
+    # logistic regression: 'num_processed' added in '__logregr_result'
+    - __logregr_cg_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
+    - __logregr_irls_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
+    - __logregr_igd_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
 # Changes to aggregates (UDA) including removal and modification
 # Overloaded functions should be mentioned separately
 uda:
-    
+    - linregr:
+        rettype: schema_madlib.linregr_result
+        argument: double precision, double precision[]
+
 # Cast operators (UDC) updated/added in v1.1/v1.2
-udc:
+udc:
\ No newline at end of file
diff --git a/src/madpack/changelist_1.1_1.3.yaml b/src/madpack/changelist_1.1_1.3.yaml
new file mode 100644
index 000000000..df432eaf8
--- /dev/null
+++ b/src/madpack/changelist_1.1_1.3.yaml
@@ -0,0 +1,61 @@
+
+# Changelist for MADlib version 1.1 to 1.3
+
+# This file contains all changes that were introduced in a new version of
+# MADlib. This changelist is used by the upgrade script to detect what objects
+# should be upgraded (while retaining all other objects from the previous version)
+
+# New modules (actually .sql_in files) added in upgrade version
+# For these files the sql_in code is retained as is with the functions in the
+# file installed on the upgrade version. All other files (that don't have
+# updates), are cleaned up to remove object replacements
+new module:
+    arima:
+    arima_forecast:
+
+
+# Changes in the types (UDT) including removal and modification
+udt:
+    __logregr_result:
+    linregr_result:
+
+# List of the UDF changes that affect the user externally.  This includes change
+# in function name, change in argument order or argument types, and removal of
+# the function. In each case, the original function is as good as removed and a
+# new function is created. In such cases, we should abort the upgrade if there
+# are user views dependent on this function, since the original function will
+# not be present in the upgraded version.
+udf:
+    # linear regression: 'num_processed' added in 'linregr_result'
+    - linregr_final:
+        rettype: schema_madlib.linregr_result
+        argument: schema_madlib.bytea8
+    - linregr_merge_states:
+        rettype: schema_madlib.bytea8
+        argument: schema_madlib.bytea8, schema_madlib.bytea8
+    - linregr_transition:
+        rettype: schema_madlib.bytea8
+        argument: schema_madlib.bytea8, double precision, double precision[]
+
+    # logistic regression: 'num_processed' added in '__logregr_result'
+    - __logregr_cg_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
+    - __logregr_irls_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
+    - __logregr_igd_result:
+        rettype: schema_madlib.__logregr_result
+        argument: double precision[]
+
+# Changes to aggregates (UDA) including removal and modification
+# Overloaded functions should be mentioned separately
+uda:
+    - linregr:
+        rettype: schema_madlib.linregr_result
+        argument: double precision, double precision[]
+
+# Cast operators (UDC) updated/added in v1.1
+udc:
diff --git a/src/madpack/madpack.py b/src/madpack/madpack.py
index f87841fa0..7f9c6f9bf 100755
--- a/src/madpack/madpack.py
+++ b/src/madpack/madpack.py
@@ -94,7 +94,7 @@ def __error(msg, stop):
 # @param msg info message
 # @param verbose prints only if True
 ## # # # # # # # # # # # # # # # # # # # # # # # # # # # #
-def __info(msg, verbose):
+def __info(msg, verbose=True):
     # Print to stdout
     if verbose:
         print this + ' : INFO : ' + msg
@@ -227,8 +227,8 @@ def __run_sql_file(schema, maddir_mod_py, module, sqlfile,
         __info(sub_module, False)
 
         # Special treatment for new module and 'svec' module
-        if (sub_module not in sc.get_change_handler().get_newmodule()) and \
-            not (sub_module == 'svec' and 'svec' in sc.get_change_handler().get_udt()):
+        if (sub_module not in sc.get_change_handler().newmodule) and \
+            not (sub_module == 'svec' and 'svec' in sc.get_change_handler().udt):
             sql = open(tmpfile).read()
             sql = sc.cleanup(sql)
             open(tmpfile, 'w').write(sql)
@@ -373,7 +373,8 @@ def __plpy_check(py_min_ver):
     __info("Testing PL/Python environment...", True)
 
     # Check PL/Python existence
-    rv = __run_sql_query("SELECT count(*) AS CNT FROM pg_language WHERE lanname = 'plpythonu'", True)
+    rv = __run_sql_query("SELECT count(*) AS CNT FROM pg_language "
+                         "WHERE lanname = 'plpythonu'", True)
     if int(rv[0]['cnt']) > 0:
         __info("> PL/Python already installed", verbose)
     else:
@@ -524,51 +525,80 @@ def __db_upgrade(schema, dbrev):
 
     abort = False
     if td.has_dependency():
+        __info("*"*50, True)
         __info("\tFollowing user tables are dependent on updated MADlib types:", True)
         __info(td.get_dependency_str(), True)
+        __info("*"*50, True)
         cd_udt = [udt for udt in td.get_depended_udt()
-                      if udt in ch.get_udt()]
+                      if udt in ch.udt]
         if len(cd_udt) > 0:
             __error("""
-                User has objects dependent on updated MADlib types ({0})!
-                These objects need to be dropped before starting upgrade again. Aborting upgrade ...
-                """.format('\n'.join(cd_udt)), False)
+                User has objects dependent on following updated MADlib types!
+                        {0}
+                These objects need to be dropped before upgrading.
+                """.format('\n\t\t\t'.join(cd_udt)), False)
+
+            # TODO: Remove this after v1.3
+            # we add special handling for 'linregr_result'
+            if 'linregr_result' in cd_udt:
+                __info("""Dependency on 'linregr_result' could be due to objects
+                        created from the output of the aggregate 'linregr'.
+                        Please refer to the Linear Regression documentation
+                        <http://doc.madlib.net/latest/group__grp__linreg.html#warning>
+                        for the recommended solution.
+                        """, False)
             abort = True
 
     if vd.has_dependency():
+        __info("*"*50, True)
         __info("\tFollowing user views are dependent on updated MADlib objects:", True)
         __info(vd.get_dependency_graph_str(), True)
+        __info("*"*50, True)
 
         c_udf = ch.get_udf_signature()
         d_udf = vd.get_depended_func_signature(False)
         cd_udf = [udf for udf in d_udf if udf in c_udf]
         if len(cd_udf) > 0:
             __error("""
-                User has objects dependent on updated MADlib functions ({0})!
-                These objects will not fail to work with the new functions and
-                need to be dropped before starting upgrade again. Aborting upgrade ...
-                """.format('\n'.join(cd_udf)), False)
+                User has objects dependent on following updated MADlib functions!
+                    {0}
+                These objects will fail to work with the updated functions and
+                need to be dropped before starting upgrade again.
+                """.format('\n\t\t\t\t\t'.join(cd_udf)), False)
             abort = True
+
         c_uda = ch.get_uda_signature()
         d_uda = vd.get_depended_func_signature(True)
         cd_uda = [uda for uda in d_uda if uda in c_uda]
         if len(cd_uda) > 0:
             __error("""
-                User has objects dependent on updated MADlib functions ({0})!
-                These objects will not fail to work with the new aggregates and
-                need to be dropped before starting upgrade again. Aborting upgrade ...
-                """.format('\n'.join(cd_uda)), False)
+                User has objects dependent on following updated MADlib aggregates!
+                    {0}
+                These objects will fail to work with the new aggregates and
+                need to be dropped before starting upgrade again.
+                """.format('\n\t\t\t\t\t'.join(cd_uda)), False)
             abort = True
 
     if abort:
         __error('------- Upgrade aborted. -------', True)
     else:
-        __info("No explicit dependency problem found, continuing to upgrade ...", True)
+        __info("No dependency problem found, continuing to upgrade ...", True)
         if vd.has_dependency():
             vd.save_and_drop()
 
     __info("\tReading existing UDAs/UDTs...", False)
-    sc = ScriptCleaner(schema, portid, con_args, ch)
+    try:
+        sc = ScriptCleaner(schema, portid, con_args, ch)
+    except Exception as e:
+        __info(str(e), True)
+        raise e
+    __info("Script Cleaner initialized ...", False)
+
+
+    # __info("\tChanged functions: " + str(ch.udf), True)
+    # __info("\tChanged aggregates: " + str(ch.uda), True)
+    # __info("\tChanged types: " + str(ch.udt), True)
+    # __info("\tChanged casts: " + str(ch.udc), True)
 
     ch.drop_changed_uda()
     ch.drop_changed_udt()
@@ -1077,8 +1107,8 @@ def main(argv):
 
         # FIXME: Change this to get the previous version from a config file
         if float(dbrev) < 1.0:
-            __info("""The version gap is too large, only release-by-release
-                    incremental upgrade is supported.""", True)
+            __info("""The version gap is too large, upgrade is supported only for
+                   packages greater than or equal to v1.0.""", True)
             return
 
         # 3) Run upgrade
@@ -1086,11 +1116,11 @@ def main(argv):
             __plpy_check(py_min_ver)
             __db_upgrade(schema, dbrev)
         except Exception as e:
-            __error("MADlib upgrade failed.", True)
             #Uncomment the following lines when debugging
-            #print "Exception: " + str(e)
-            #print sys.exc_info()
-            #traceback.print_tb(sys.exc_info()[2])
+            print "Exception: " + str(e)
+            print sys.exc_info()
+            traceback.print_tb(sys.exc_info()[2])
+            __error("MADlib upgrade failed.", True)
 
     ###
     # COMMAND: install-check
diff --git a/src/madpack/upgrade_util.py b/src/madpack/upgrade_util.py
index 6d67fd449..f5a3755a6 100644
--- a/src/madpack/upgrade_util.py
+++ b/src/madpack/upgrade_util.py
@@ -1,30 +1,31 @@
 import re
-import sys
 import yaml
 from collections import defaultdict
 import os
 
-"""
-@brief Wrapper function for ____run_sql_query
-"""
+
 def run_sql(sql, portid, con_args):
+    """
+    @brief Wrapper function for ____run_sql_query
+    """
     from madpack import ____run_sql_query
     return ____run_sql_query(sql, True, portid, con_args)
 
-"""
-@brief Get the signature of a UDF/UDA for comparison
-"""
+
 def get_signature_for_compare(schema, proname, rettype, argument):
-    signature = '%s %s.%s(%s)' % (
-        rettype.strip(), schema.strip(), proname.strip(), argument.strip())
-    signature = re.sub('\s+', ' ', signature)
+    """
+    @brief Get the signature of a UDF/UDA for comparison
+    """
+    signature = '{0} {1}.{2}({3})'.format(rettype.strip(), schema.strip(),
+                                          proname.strip(), argument.strip())
     signature = re.sub('"', '', signature)
     return signature.lower()
 
-"""
-@brief Base class for handling the upgrade
-"""
+
 class UpgradeBase:
+    """
+    @brief Base class for handling the upgrade
+    """
     def __init__(self, schema, portid, con_args):
         self._schema = schema.lower()
         self._portid = portid
@@ -46,21 +47,20 @@ def _get_schema_oid(self):
             SELECT oid FROM pg_namespace WHERE nspname = '{schema}'
             """.format(schema=self._schema))[0]['oid']
 
-
-    """
-    @brief Get the function name, return type, and arguments given an oid
-    @note The function can only handle the case that proallargtypes is null,
-    refer to pg_catalog.pg_get_function_identity_argument and
-    pg_catalog.pg_get_function_result in PG for a complete implementation, which are
-    not supported by GP
-    """
     def _get_function_info(self, oid):
+        """
+        @brief Get the function name, return type, and arguments given an oid
+        @note The function can only handle the case that proallargtypes is null,
+        refer to pg_catalog.pg_get_function_identity_argument and
+        pg_catalog.pg_get_function_result in PG for a complete implementation, which are
+        not supported by GP
+        """
         row = self._run_sql("""
             SELECT
                 max(proname) AS proname,
                 max(rettype) AS rettype,
                 array_to_string(
-                    array_agg(argname || ' ' || argtype order by i), ', ') AS argument
+                    array_agg(argname || ' ' || argtype order by i), ',') AS argument
             FROM
             (
                 SELECT
@@ -81,14 +81,16 @@ def _get_function_info(self, oid):
                     oid = {oid}
             ) AS f
             """.format(oid=oid))
-        return {"proname": row[0]['proname'], 'rettype': row[0]['rettype'],
-            'argument': row[0]['argument']}
+        return {"proname": row[0]['proname'],
+                "rettype": row[0]['rettype'],
+                "argument": row[0]['argument']}
+
 
-"""
-@brief This class reads changes from the configuration file and handles
-the dropping of objects
-"""
 class ChangeHandler(UpgradeBase):
+    """
+    @brief This class reads changes from the configuration file and handles
+    the dropping of objects
+    """
     def __init__(self, schema, portid, con_args, maddir, mad_dbrev):
         UpgradeBase.__init__(self, schema, portid, con_args)
         self._opr_ind_svec = None
@@ -102,11 +104,10 @@ def __init__(self, schema, portid, con_args, maddir, mad_dbrev):
         self._udc = None
         self._load()
 
-
-    """
-    @brief Get the UDOps which are independent of svec in the current version
-    """
     def _get_opr_indepent_svec(self):
+        """
+        @brief Get the User Defined Operators independent of svec in the current version
+        """
         rows = self._run_sql("""
             SELECT
                 oprname,
@@ -141,16 +142,19 @@ def _load_config_param(self, config_iterable):
         make all function names lower case to ensure ease of comparison.
 
         Args:
-            @param config_dict is a dictionary with key as object name
-                        (eg. function name) and value as the details for
-                        the object. The details for the object are assumed to
+            @param config_iterable is an iterable of dictionaries, each with
+                        key = object name (eg. function name) and value = details
+                        for the object. The details for the object are assumed to
                         be in a dictionary with following keys:
                             rettype: Return type
                             argument: List of arguments
 
         Returns:
             A dictionary that lists all specific objects (functions, aggregates, etc)
-            with object name as key and another dictionary with objects details
+            with object name as key and a list as value, where the list
+            contains all the items present in
+
+            another dictionary with objects details
             as the value.
         """
         _return_obj = defaultdict(list)
@@ -162,70 +166,61 @@ def _load_config_param(self, config_iterable):
                     if obj_details['argument'] is not None:
                         argument = obj_details['argument'].lower().replace(
                                                 'schema_madlib', self._schema)
+                        all_arguments = [each_arg.strip()
+                                         for each_arg in argument.split(',')]
                     _return_obj[obj_name].append(
-                                    {'rettype': rettype, 'argument': argument})
+                                    {'rettype': rettype,
+                                     'argument': ','.join(all_arguments)})
         return _return_obj
 
-    """
-    @brief Load the configuration file
-    """
     def _load(self):
+        """
+        @brief Load the configuration file
+        """
         # _mad_dbrev = 1.0
         if float(self._mad_dbrev) < 1.1:
-            filename = os.path.join(self._maddir, 'madpack' , 'changelist_1.0_1.2.yaml')
+            filename = os.path.join(self._maddir, 'madpack',
+                                    'changelist_1.0_1.3.yaml')
         # _mad_dbrev = 1.1
+        elif float(self._mad_dbrev) < 1.2:
+            filename = os.path.join(self._maddir, 'madpack',
+                                    'changelist_1.1_1.3.yaml')
         else:
-            filename = os.path.join(self._maddir, 'madpack' , 'changelist.yaml')
-            
-        config = yaml.load(open(filename))
-
-        if config['new module'] is not None:
-            self._newmodule = config['new module']
-        else:
-            self._newmodule = {}
+            filename = os.path.join(self._maddir, 'madpack',
+                                    'changelist.yaml')
 
-        if config['udt'] is not None:
-            self._udt = config['udt']
-        else:
-            self._udt = {}
-
-        if config['udc'] is not None:
-            self._udc = config['udc']
-        else:
-            self._udc = {}
+        config = yaml.load(open(filename))
 
+        self._newmodule = config['new module'] if config['new module'] else {}
+        self._udt = config['udt'] if config['udt'] else {}
+        self._udc = config['udc'] if config['udc'] else {}
         self._udf = self._load_config_param(config['udf'])
         self._uda = self._load_config_param(config['uda'])
 
-    """
-    @brief Get the list of new modules
-    """
-    def get_newmodule(self):
+    @property
+    def newmodule(self):
         return self._newmodule
 
-    """
-    @brief Get the list of changed UDTs
-    """
-    def get_udt(self):
+    @property
+    def udt(self):
         return self._udt
 
-    """
-    @brief Get the list of changed UDAs
-    """
-    def get_uda(self):
+    @property
+    def uda(self):
         return self._uda
 
-    """
-    @brief Get the list of changed UDCs
-    @note This is a UDC in utilities module
-    """
-    def get_udc(self):
+    @property
+    def udf(self):
+        return self._udf
+
+    @property
+    def udc(self):
         return self._udc
 
-    """
-    @brief Get the list of UDF signatures for comparison
-    """
     def get_udf_signature(self):
+        """
+        @brief Get the list of UDF signatures for comparison
+        """
         res = defaultdict(bool)
         for udf in self._udf:
             for item in self._udf[udf]:
@@ -234,10 +229,10 @@ def get_udf_signature(self):
                 res[signature] = True
         return res
 
-    """
-    @brief Get the list of UDA signatures for comparison
-    """
     def get_uda_signature(self):
+        """
+        @brief Get the list of UDA signatures for comparison
+        """
         res = defaultdict(bool)
         for uda in self._uda:
             for item in self._uda[uda]:
@@ -246,18 +241,17 @@ def get_uda_signature(self):
                 res[signature] = True
         return res
 
-    """
-    @brief Drop all types that were updated/removed in the new version
-    @note It is dangerous to drop a UDT becuase there might be many
-    dependencies
-    """
     def drop_changed_udt(self):
+        """
+        @brief Drop all types that were updated/removed in the new version
+        @note It is dangerous to drop a UDT becuase there might be many
+        dependencies
+        """
         # Note that we use CASCADE option here. This might be dangerous because
         # it may drop some undetected dependent objects (eg. UDCast, UDOp, etc)
         for udt in self._udt:
-            self._run_sql("""
-                DROP TYPE IF EXISTS {schema}.{udt} CASCADE
-                """.format(schema=self._schema, udt=udt))
+            self._run_sql("DROP TYPE IF EXISTS {0}.{1} CASCADE".
+                          format(self._schema, udt))
             if udt == 'svec':
                 # Drop operators defined in the svec module which do not
                 # depend on svec. We will run the whole svec.sql without
@@ -273,47 +267,45 @@ def drop_changed_udt(self):
                             nsp_right=self._opr_ind_svec[opr]['nsp_right'],
                             typ_right=self._opr_ind_svec[opr]['typ_right']
                         ))
-    """
-    @brief Drop all functions (UDF) that were removed in new version
-    """
+
     def drop_changed_udf(self):
+        """
+        @brief Drop all functions (UDF) that were removed in new version
+        """
         for udf in self._udf:
             for item in self._udf[udf]:
-                self._run_sql("""
-                    DROP FUNCTION IF EXISTS {schema}.{udf}({arg})
-                    """.format(schema=self._schema,
-                        udf=udf,
-                        arg=item['argument']))
+                self._run_sql("DROP FUNCTION IF EXISTS {schema}.{udf}({arg})".
+                              format(schema=self._schema,
+                                     udf=udf,
+                                     arg=item['argument']))
 
-    """
-    @brief Drop all aggregates (UDA) that were removed in new version
-    """
     def drop_changed_uda(self):
+        """
+        @brief Drop all aggregates (UDA) that were removed in new version
+        """
         for uda in self._uda:
             for item in self._uda[uda]:
-                self._run_sql("""
-                    DROP AGGREGATE IF EXISTS {schema}.{uda}({arg})
-                    """.format(schema=self._schema,
-                        uda=uda,
-                        arg=item['argument']))
+                self._run_sql("DROP AGGREGATE IF EXISTS {schema}.{uda}({arg})".
+                              format(schema=self._schema,
+                                     uda=uda,
+                                     arg=item['argument']))
 
-    """
-    @brief Drop all casts (UDC) that were updated/removed in new version
-    @note We have special treatment for UDCs defined in the svec module
-    """
     def drop_changed_udc(self):
+        """
+        @brief Drop all casts (UDC) that were updated/removed in new version
+        @note We have special treatment for UDCs defined in the svec module
+        """
         for udc in self._udc:
-            self._run_sql("""
-                DROP CAST IF EXISTS ({sourcetype} AS {targettype})
-                """.format(
-                    sourcetype=self._udc[udc]['sourcetype'],
-                    targettype=self._udc[udc]['targettype']))
+            self._run_sql("DROP CAST IF EXISTS ({sourcetype} AS {targettype})".
+                          format(sourcetype=self._udc[udc]['sourcetype'],
+                                 targettype=self._udc[udc]['targettype']))
+
 
-"""
-@brief This class detects the direct/recursive view dependencies on MADLib
-UDFs/UDAs defined in the current version
-"""
 class ViewDependency(UpgradeBase):
+    """
+    @brief This class detects the direct/recursive view dependencies on MADLib
+    UDFs/UDAs defined in the current version
+    """
     def __init__(self, schema, portid, con_args):
         UpgradeBase.__init__(self, schema, portid, con_args)
         self._view2proc = None
@@ -358,7 +350,7 @@ def _detect_direct_view_dependency(self):
 
         self._view2proc = defaultdict(list)
         for row in rows:
-            key= (row['schema'], row['view'])
+            key = (row['schema'], row['view'])
             self._view2proc[key].append(
                 (row['procname'], row['procoid'],
                     True if row['proisagg'] == 't' else False))
@@ -443,7 +435,7 @@ def _filter_recursive_view_dependency(self):
     """
     @brief  Build the dependency graph (depender-to-dependee adjacency list)
     """
-    def _build_dependency_graph(self, hasProcDependency = False):
+    def _build_dependency_graph(self, hasProcDependency=False):
         der2dee = self._view2view.copy()
         for view in self._view2proc:
             if view not in self._view2view:
@@ -480,7 +472,7 @@ def get_create_order_views(self):
                 del graph[view]
             for depender in graph:
                 graph[depender] = [r for r in graph[depender]
-                    if r not in remove_list]
+                                   if r not in remove_list]
             if len(remove_list) == 0:
                 break
         return ordered_views
@@ -496,53 +488,43 @@ def get_drop_order_views(self):
     """
     @brief Get the depended UDF/UDA signatures for comparison
     """
-    def get_depended_func_signature(self, isagg = True):
+    def get_depended_func_signature(self, aggregate=True):
         res = {}
         for procs in self._view2proc.values():
             for proc in procs:
-                if proc[2] != isagg:
-                    continue
-                if (self._schema, proc) not in res:
+                if proc[2] is aggregate and (self._schema, proc) not in res:
                     funcinfo = self._get_function_info(proc[1])
-                    signature = get_signature_for_compare(
-                        self._schema, proc[0], funcinfo['rettype'], funcinfo['argument'])
+                    signature = get_signature_for_compare(self._schema, proc[0],
+                                                          funcinfo['rettype'],
+                                                          funcinfo['argument'])
                     res[signature] = True
         return res
 
-    """
-    @brief Get dependent UDAs
-    """
-    def get_depended_uda(self):
+    def get_proc_w_dependency(self, aggregate=True):
         res = []
         for procs in self._view2proc.values():
             for proc in procs:
-                if proc[2] == False:
-                    # proc is not an aggregate -> skip
-                    continue
-                if (self._schema, proc) not in res:
+                if proc[2] is aggregate and (self._schema, proc) not in res:
                     res.append((self._schema, proc))
         res.sort()
         return res
 
-    """
-    @brief Get dependent UDFs
-    """
+    def get_depended_uda(self):
+        """
+        @brief Get dependent UDAs
+        """
+        self.get_proc_w_dependency(aggregate=True)
+
     def get_depended_udf(self):
-        res = []
-        for procs in self._view2proc.values():
-            for proc in procs:
-                if proc[2] == True:
-                    # proc is an aggregate -> skip
-                    continue
-                if (self._schema, proc) not in res:
-                    res.append((self._schema, proc))
-        res.sort()
-        return res
+        """
+        @brief Get dependent UDFs
+        """
+        self.get_proc_w_dependency(aggregate=False)
 
-    """
-    @brief Save and drop the dependent views
-    """
     def save_and_drop(self):
+        """
+        @brief Save and drop the dependent views
+        """
         self._view2def = {}
         ordered_views = self.get_drop_order_views()
         # Save views
@@ -564,10 +546,10 @@ def save_and_drop(self):
                 DROP VIEW IF EXISTS {schema}.{view}
                 """.format(schema=view[0], view=view[1]))
 
-    """
-    @brief Restore the dependent views
-    """
     def restore(self):
+        """
+        @brief Restore the dependent views
+        """
         ordered_views = self.get_create_order_views()
         for view in ordered_views:
             row = self._view2def[view]
@@ -584,10 +566,10 @@ def restore(self):
                 RESET ROLE
                 """.format(
                     schema=schema, view=view,
-                    definition=definition, owner=owner))
+                    definition=definition,
+                    owner=owner))
 
     def _node_to_str(self, node):
-        res = ''
         if len(node) == 2:
             res = '%s.%s' % (node[0], node[1])
         else:
@@ -596,38 +578,36 @@ def _node_to_str(self, node):
         return res
 
     def _nodes_to_str(self, nodes):
-        res = []
-        for node in nodes:
-            res.append(self._node_to_str(node))
-        return res
+        return [self._node_to_str(i) for i in nodes]
 
-    """
-    @brief Get the dependency graph string for print
-    """
     def get_dependency_graph_str(self):
+        """
+        @brief Get the dependency graph string for print
+        """
         graph = self._build_dependency_graph(True)
-        nodes = graph.keys()
+        nodes = list(graph.keys())
         nodes.sort()
-        res = '\t\tDependency Graph (Depender-Dependee Adjacency List):\n'
+        res = ["\tDependency Graph (Depender-Dependee Adjacency List):"]
         for node in nodes:
-            res += "\t\t%s -> %s\n" % (
-                self._node_to_str(node), self._nodes_to_str(graph[node]))
-        return res[:-1]
-
-"""
-@brief This class detects the table dependencies on MADLib UDTs defined in the
-current version
-"""
+            res.append("{0} -> {1}".format(self._node_to_str(node),
+                                           self._nodes_to_str(graph[node])))
+        return "\n\t\t\t\t".join(res)
+
+
 class TableDependency(UpgradeBase):
+    """
+    @brief This class detects the table dependencies on MADLib UDTs defined in the
+    current version
+    """
     def __init__(self, schema, portid, con_args):
         UpgradeBase.__init__(self, schema, portid, con_args)
         self._table2type = None
         self._detect_table_dependency()
 
-    """
-    @brief Detect the table dependencies on MADLib UDTs
-    """
     def _detect_table_dependency(self):
+        """
+        @brief Detect the table dependencies on MADLib UDTs
+        """
         rows = self._run_sql("""
             SELECT
                 nsp.nspname AS schema,
@@ -651,20 +631,20 @@ def _detect_table_dependency(self):
 
         self._table2type = defaultdict(list)
         for row in rows:
-            key= (row['schema'], row['relation'])
+            key = (row['schema'], row['relation'])
             self._table2type[key].append(
                 (row['column'], row['type']))
 
-    """
-    @brief Check dependencies
-    """
     def has_dependency(self):
+        """
+        @brief Check dependencies
+        """
         return len(self._table2type) > 0
 
-    """
-    @brief Get the list of depended UDTs
-    """
     def get_depended_udt(self):
+        """
+        @brief Get the list of depended UDTs
+        """
         res = defaultdict(bool)
         for table in self._table2type:
             for (col, typ) in self._table2type[table]:
@@ -672,34 +652,38 @@ def get_depended_udt(self):
                     res[typ] = True
         return res
 
-    """
-    @brief Get the dependencies in string for print
-    """
     def get_dependency_str(self):
-        res = '\t\tTable Dependency (schema.table.column -> type):\n'
+        """
+        @brief Get the dependencies in string for print
+        """
+        res = ['\tTable Dependency (schema.table.column -> MADlib type):']
         for table in self._table2type:
             for (col, udt) in self._table2type[table]:
-                res += "\t\t%s.%s.%s -> %s\n" % (table[0], table[1], col, udt)
-        return res[:-1]
+                res.append("{0}.{1}.{2} -> {3}".format(table[0], table[1], col,
+                                                       udt))
+        return "\n\t\t\t\t".join(res)
+
 
-"""
-@brief This class removes sql statements from a sql script which should not be
-executed during the upgrade
-"""
 class ScriptCleaner(UpgradeBase):
+    """
+    @brief This class removes sql statements from a sql script which should not be
+    executed during the upgrade
+    """
     def __init__(self, schema, portid, con_args, change_handler):
         UpgradeBase.__init__(self, schema, portid, con_args)
+        self._ch = change_handler
         self._sql = None
         self._existing_uda = None
         self._existing_udt = None
-        self._get_existing_uda()
+        self._aggregate_patterns = self._get_all_aggregate_patterns()
+        # print("Number of existing UDAs = " + str(len(self._existing_uda)))
+        # print("Number of UDAs to not create = " + str(len(self._aggregate_patterns)))
         self._get_existing_udt()
-        self._ch = change_handler
 
-    """
-    @breif Get the existing UDAs in the current version
-    """
     def _get_existing_uda(self):
+        """
+        @brief Get the existing UDAs in the current version
+        """
         rows = self._run_sql("""
             SELECT
                 max(proname) AS proname,
@@ -730,19 +714,47 @@ def _get_existing_uda(self):
             GROUP BY
                 procoid
             """.format(schema=self._schema))
-        self._existing_uda = {}
+        self._existing_uda = defaultdict(list)
         for row in rows:
             # Consider about the overloaded aggregates
-            if row['proname'] not in self._existing_uda:
-                self._existing_uda[row['proname']] = []
-            self._existing_uda[row['proname']].append({
-                'rettype': ['rettype'],
-                'argument': row['argument']})
+            self._existing_uda[row['proname']].append(
+                                    {'rettype': row['rettype'],
+                                     'argument': row['argument']})
+
+    def _get_all_aggregate_patterns(self):
+        """
+        Creates a list of string patterns that represent all possible
+        'CREATE AGGREGATE' statements except ones that are being
+        replaced/introduced as part of this upgrade.
+
+        """
+        self._get_existing_uda()
+        aggregate_patterns = []
+        for each_uda, uda_details in self._existing_uda.iteritems():
+            for each_item in uda_details:
+                if each_uda in self._ch.uda:
+                    if each_item in self._ch.uda[each_uda]:
+                        continue
+                p_arg_str = ''
+                argument = each_item['argument']
+                args = argument.split(',')
+                for arg in args:
+                    arg = self._rewrite_type_in(arg.strip())
+                    if p_arg_str == '':
+                        p_arg_str += '%s\s*' % arg
+                    else:
+                        p_arg_str += ',\s*%s\s*' % arg
+                p_str = "CREATE\s+(ORDERED\s)*\s*AGGREGATE" \
+                        "\s+%s\.(%s)\s*\(\s*%s\)(.*?);" % (self._schema.upper(),
+                                                           each_uda,
+                                                           p_arg_str)
+                aggregate_patterns.append(p_str)
+        return aggregate_patterns
 
-    """
-    @brief Get the existing UDTs in the current version
-    """
     def _get_existing_udt(self):
+        """
+        @brief Get the existing UDTs in the current version
+        """
         rows = self._run_sql("""
             SELECT
                 typname
@@ -753,21 +765,19 @@ def _get_existing_udt(self):
                 t.typnamespace = nsp.oid AND
                 nsp.nspname = '{schema}'
             """.format(schema=self._schema))
-        self._existing_udt = []
-        for row in rows:
-            self._existing_udt.append(row['typname'])
+        self._existing_udt = [row['typname'] for row in rows]
 
-    """
-    @note The changer_handler is needed for deciding which sql statements to
-    remove
-    """
     def get_change_handler(self):
+        """
+        @note The changer_handler is needed for deciding which sql statements to
+        remove
+        """
         return self._ch
 
-    """
-    @brief Remove comments in the sql script
-    """
     def _clean_comment(self):
+        """
+        @brief Remove comments in the sql script
+        """
         pattern = re.compile(r"""(/\*(.|[\r\n])*?\*/)|(--(.*|[\r\n]))""")
         res = ''
         lines = re.split(r'[\r\n]+', self._sql)
@@ -784,20 +794,20 @@ def _clean_comment(self):
     """
     def _clean_type(self):
         # remove 'drop type'
-        pattern = re.compile('DROP(\s+)TYPE(.*?);', re.DOTALL | re.IGNORECASE);
+        pattern = re.compile('DROP(\s+)TYPE(.*?);', re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, '', self._sql)
 
         # remove 'create type'
         udt_str = ''
         for udt in self._existing_udt:
-            if udt in self._ch.get_udt():
+            if udt in self._ch.udt:
                 continue
             if udt_str == '':
                 udt_str += udt
             else:
                 udt_str += '|' + udt
         p_str = 'CREATE(\s+)TYPE(\s+)%s\.(%s)(.*?);' % (self._schema.upper(), udt_str)
-        pattern = re.compile(p_str, re.DOTALL | re.IGNORECASE);
+        pattern = re.compile(p_str, re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, '', self._sql)
 
     """
@@ -805,22 +815,25 @@ def _clean_type(self):
     """
     def _clean_cast(self):
         # remove 'drop cast'
-        pattern = re.compile('DROP(\s+)CAST(.*?);', re.DOTALL | re.IGNORECASE);
+        pattern = re.compile('DROP(\s+)CAST(.*?);', re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, '', self._sql)
 
         # remove 'create cast'
         udc_str = ''
-        for udc in self._ch.get_udc():
+        for udc in self._ch.udc:
             if udc_str == '':
                 udc_str += '%s\s+AS\s+%s' % (
-                    self._ch.get_udc()[udc]['sourcetype'], self._ch.get_udc()[udc]['targettype'])
+                    self._ch.udc[udc]['sourcetype'],
+                    self._ch.udc[udc]['targettype'])
             else:
                 udc_str += '|' + '%s\s+AS\s+%s' % (
-                    self._ch.get_udc()[udc]['sourcetype'], self._ch.get_udc()[udc]['targettype'])
+                    self._ch.udc[udc]['sourcetype'],
+                    self._ch.udc[udc]['targettype'])
 
         pattern = re.compile('CREATE\s+CAST(.*?);', re.DOTALL | re.IGNORECASE)
         if udc_str != '':
-            pattern = re.compile('CREATE\s+CAST\s*\(\s*(?!%s)(.*?);' % udc_str , re.DOTALL | re.IGNORECASE)
+            pattern = re.compile('CREATE\s+CAST\s*\(\s*(?!%s)(.*?);' %
+                                 udc_str, re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, '', self._sql)
 
     """
@@ -828,11 +841,11 @@ def _clean_cast(self):
     """
     def _clean_operator(self):
         # remove 'drop operator'
-        pattern = re.compile('DROP(\s+)OPERATOR(.*?);', re.DOTALL | re.IGNORECASE);
+        pattern = re.compile('DROP(\s+)OPERATOR(.*?);', re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, '', self._sql)
 
         # remove 'create operator'
-        pattern = re.compile(r"""CREATE(\s+)OPERATOR(.*?);""", re.DOTALL | re.IGNORECASE);
+        pattern = re.compile(r"""CREATE(\s+)OPERATOR(.*?);""", re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, '', self._sql)
 
     """
@@ -840,64 +853,45 @@ def _clean_operator(self):
     """
     def _rewrite_type_in(self, arg):
         type_mapper = {
-            'smallint':'(int2|smallint)',
-            'integer':'(int|int4|integer)',
-            'bigint':'(int8|bigint)',
-            'double precision':'(float8|double precision)',
-            'real':'(float4|real)',
-            'character varying':'(varchar|character varying)'
+            'smallint': '(int2|smallint)',
+            'integer': '(int|int4|integer)',
+            'bigint': '(int8|bigint)',
+            'double precision': '(float8|double precision)',
+            'real': '(float4|real)',
+            'character varying': '(varchar|character varying)'
         }
         for typ in type_mapper:
             arg = arg.replace(typ, type_mapper[typ])
         return arg.replace('[', '\[').replace(']', '\]')
 
-    """
-    @brief Remove "drop/create aggregate" statements in the sql script
-    """
     def _clean_aggregate(self):
-        # remove 'drop aggregate'
-        pattern = re.compile('DROP(\s+)AGGREGATE(.*?);', re.DOTALL | re.IGNORECASE);
-        self._sql = re.sub(pattern, '', self._sql)
+        # remove all drop aggregate statements
+        self._sql = re.sub(re.compile('DROP(\s+)AGGREGATE(.*?);',
+                                      re.DOTALL | re.IGNORECASE),
+                           '', self._sql)
+        # remove all create aggregate statements except ones that should
+        #  be created as part of upgrade
+        for each_pattern in self._aggregate_patterns:
+            regex_pat = re.compile(each_pattern, re.DOTALL | re.IGNORECASE)
+            self._sql = re.sub(regex_pat, '', self._sql)
 
-        # remove 'create aggregate'
-        uda_str = ''
-        for uda in self._existing_uda:
-            for item in self._existing_uda[uda]:
-                if uda in self._ch.get_uda():
-                    items = self._ch.get_uda()[uda]
-                    if item in items:
-                        continue
-                p_arg_str = ''
-                argument = item['argument']
-                args = argument.split(',')
-                for arg in args:
-                    arg = self._rewrite_type_in(arg.strip())
-                    if p_arg_str == '':
-                        p_arg_str += '%s\s*' % arg
-                    else:
-                        p_arg_str += ',\s*%s\s*' % arg
-                p_str = 'CREATE\s+(ORDERED\s)*\s*AGGREGATE\s+%s\.(%s)\s*\(\s*%s\)(.*?);' % (
-                    self._schema.upper(), uda, p_arg_str)
-                pattern = re.compile(p_str, re.DOTALL | re.IGNORECASE);
-                self._sql = re.sub(pattern, '', self._sql)
-
-    """
-    @brief Remove "drop function" statements and rewrite "create function"
-    statements in the sql script
-    @note We don't drop any function
-    """
     def _clean_function(self):
+        """
+        @brief Remove "drop function" statements and rewrite "create function"
+        statements in the sql script
+        @note We don't drop any function
+        """
         # remove 'drop function'
-        pattern = re.compile(r"""DROP(\s+)FUNCTION(.*?);""", re.DOTALL | re.IGNORECASE);
+        pattern = re.compile(r"""DROP(\s+)FUNCTION(.*?);""", re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, '', self._sql)
         # replace 'create function' with 'create or replace function'
-        pattern = re.compile(r"""CREATE(\s+)FUNCTION""", re.DOTALL | re.IGNORECASE);
+        pattern = re.compile(r"""CREATE(\s+)FUNCTION""", re.DOTALL | re.IGNORECASE)
         self._sql = re.sub(pattern, 'CREATE OR REPLACE FUNCTION', self._sql)
 
-    """
-    @brief Entry function for cleaning the sql script
-    """
     def cleanup(self, sql):
+        """
+        @brief Entry function for cleaning the sql script
+        """
         self._sql = sql
         self._clean_comment()
         self._clean_type()
diff --git a/src/ports/postgres/modules/regress/linear.py_in b/src/ports/postgres/modules/regress/linear.py_in
index a524c653b..38aacaf25 100644
--- a/src/ports/postgres/modules/regress/linear.py_in
+++ b/src/ports/postgres/modules/regress/linear.py_in
@@ -9,7 +9,7 @@ from utilities.utilities import __unique_string
 from utilities.validate_args import table_exists
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_is_empty
-from utilities.utilities import _assert 
+from utilities.utilities import _assert
 
 # ----------------------------------------------------------------------
 def linregr_train(schema_madlib, source_table, out_table,
@@ -20,17 +20,17 @@ def linregr_train(schema_madlib, source_table, out_table,
         """select setting from pg_settings where
         name='client_min_messages'""")[0]['setting']
     plpy.execute("set client_min_messages to error")
-    
+
     _validate_args(schema_madlib, source_table, out_table,
         dependent_varname, independent_varname, grouping_cols,
         heteroskedasticity_option)
 
     group_str = '' if grouping_cols is None else 'GROUP BY %s' % grouping_cols
     group_str_sel = '' if grouping_cols is None else grouping_cols + ','
-    join_str = ',' if grouping_cols is None else 'JOIN' 
+    join_str = ',' if grouping_cols is None else 'JOIN'
     using_str = '' if grouping_cols is None else 'USING (%s)' % grouping_cols
 
-    # Run linear regression 
+    # Run linear regression
     temp_lin_rst = __unique_string()
     plpy.execute(
         """
@@ -46,12 +46,12 @@ def linregr_train(schema_madlib, source_table, out_table,
             {source_table}
         {group_str}
         """.format(schema_madlib=schema_madlib,
-            temp_lin_rst=temp_lin_rst,
-            group_str=group_str,
-            group_str_sel=group_str_sel,
-            dependent_varname=dependent_varname,
-            independent_varname=independent_varname,
-            source_table=source_table))
+                   temp_lin_rst=temp_lin_rst,
+                   group_str=group_str,
+                   group_str_sel=group_str_sel,
+                   dependent_varname=dependent_varname,
+                   independent_varname=independent_varname,
+                   source_table=source_table))
 
     # Run heteroskedasticity test
     if heteroskedasticity_option:
@@ -65,18 +65,18 @@ def linregr_train(schema_madlib, source_table, out_table,
                 {schema_madlib}.heteroskedasticity_test_linregr(
                     {dependent_varname},
                     {independent_varname},
-                    (lin_rst).coef) AS hsk_rst 
+                    (lin_rst).coef) AS hsk_rst
             FROM
                 {source_table} {join_str} {temp_lin_rst} {using_str}
             {group_str}
             """.format(schema_madlib=schema_madlib,
-                temp_hsk_rst=temp_hsk_rst,
-                dependent_varname=dependent_varname,
-                independent_varname=independent_varname,
-                group_str_sel=group_str_sel, group_str=group_str,
-                join_str=join_str, using_str=using_str,
-                source_table=source_table, temp_lin_rst=temp_lin_rst))
-    
+                       temp_hsk_rst=temp_hsk_rst,
+                       dependent_varname=dependent_varname,
+                       independent_varname=independent_varname,
+                       group_str_sel=group_str_sel, group_str=group_str,
+                       join_str=join_str, using_str=using_str,
+                       source_table=source_table, temp_lin_rst=temp_lin_rst))
+
     # Output the results
     join_str = ''
     using_str = ''
@@ -105,7 +105,7 @@ def linregr_train(schema_madlib, source_table, out_table,
             CASE WHEN (lin.lin_rst).num_processed IS NULL
                 THEN 0
                 ELSE (lin.lin_rst).num_processed
-            END AS num_rows_processed, 
+            END AS num_rows_processed,
             CASE WHEN (lin.lin_rst).num_processed IS NULL
                 THEN lin.num_rows
                 ELSE lin.num_rows - (lin.lin_rst).num_processed
@@ -127,14 +127,14 @@ def _validate_args(schema_madlib, source_table, out_table, dependent_varname,
     @brief validate the arguments
     """
     _assert(source_table is not None and
-         source_table.strip().lower() not in ('null', ''), 
+         source_table.strip().lower() not in ('null', ''),
             "Linregr error: Invalid data table name!")
     _assert(table_exists(source_table),
         "Linregr error: Data table does not exist!")
     _assert(not table_is_empty(source_table),
         "Linregr error: Data table is empty!")
 
-    _assert(out_table is not None and 
+    _assert(out_table is not None and
         out_table.strip().lower() not in ('null', ''),
             "Linregr error: Invalid output table name!")
     _assert(not table_exists(out_table),
@@ -159,3 +159,110 @@ def _validate_args(schema_madlib, source_table, out_table, dependent_varname,
         heteroskedasticity_option in (True, False),
         "Linregr error: Invalid heteroskedasticity_option")
 
+# ------------------------------------------------------------------------------
+# -- Online help function ------------------------------------------------------
+# ------------------------------------------------------------------------------
+
+def linregr_help_message(schema_madlib, message, **kwargs):
+    """ Help message for Linear Regression
+
+    @brief
+    Args:
+        @param schema_madlib string, Name of the schema madlib
+        @param message string, Help message indicator
+
+    Returns:
+        String. Contain the help message string
+    """
+    if not message:
+        help_string = """
+        -----------------------------------------------------------------------
+                                    SUMMARY
+        -----------------------------------------------------------------------
+        Ordinary Least Squares Regression, also called Linear Regression, is a
+        statistical model used to fit linear models.
+
+        It models a linear relationship of a scalar dependent variable \f$ y \f$ to one
+        or more explanatory independent variables \f$ x \f$ to build a
+        model of coefficients.
+
+        For more details on function usage:
+            SELECT {schema_madlib}.linregr_train('usage')
+
+        For an example on using the function:
+            SELECT {schema_madlib}.linregr_train('example')
+        """
+    elif message in ['usage', 'help', '?']:
+        help_string = """
+        -----------------------------------------------------------------------
+                                        USAGE
+        -----------------------------------------------------------------------
+         SELECT {schema_madlib}.linregr_train(
+            source_table,                -- name of input  table
+            out_table,                   -- name of output table
+            dependent_varname,           -- name of dependent variable
+            independent_varname,         -- name of independent variable
+            grouping_cols,               -- names of columns to group-by
+            heteroskedasticity_option,   -- perform heteroskedasticity test?
+         );
+
+        -----------------------------------------------------------------------
+                                        OUTUPT
+        -----------------------------------------------------------------------
+        The output table ('out_table' above) has the following columns
+             <...>,                               -- Grouping columns used during training
+             'coef'          DOUBLE PRECISION[],  -- Vector of coefficients
+             'r2'            DOUBLE PRECISION,    -- R-squared coefficient
+             'std_err'       DOUBLE PRECISION[],  -- Standard errors of coefficients
+             't_stats'       DOUBLE PRECISION[],  -- t-stats of the coefficients
+             'p_values'      DOUBLE PRECISION[],  -- p-values of the coefficients
+             'condition_no'  INTEGER,             -- The condition number of the covariance matrix.
+             'bp_stats'      DOUBLE PRECISION,    -- The Breush-Pagan statistic of heteroskedacity.
+                                                      (if heteroskedasticity_option=TRUE)
+             'bp_p_value'    DOUBLE PRECISION     -- The Breush-Pagan calculated p-value.
+                                                      (if heteroskedasticity_option=TRUE)
+        """
+    elif message in ['example', 'examples']:
+        help_string = """
+        CREATE TABLE houses (id INT, tax INT,
+                             bedroom INT, bath FLOAT,
+                             price INT, size INT, lot INT);
+        COPY houses FROM STDIN WITH DELIMITER '|';
+          1 |  590 |       2 |    1 |  50000 |  770 | 22100
+          2 | 1050 |       3 |    2 |  85000 | 1410 | 12000
+          3 |   20 |       3 |    1 |  22500 | 1060 |  3500
+          4 |  870 |       2 |    2 |  90000 | 1300 | 17500
+          5 | 1320 |       3 |    2 | 133000 | 1500 | 30000
+          6 | 1350 |       2 |    1 |  90500 |  820 | 25700
+          7 | 2790 |       3 |  2.5 | 260000 | 2130 | 25000
+          8 |  680 |       2 |    1 | 142500 | 1170 | 22000
+          9 | 1840 |       3 |    2 | 160000 | 1500 | 19000
+         10 | 3680 |       4 |    2 | 240000 | 2790 | 20000
+         11 | 1660 |       3 |    1 |  87000 | 1030 | 17500
+         12 | 1620 |       3 |    2 | 118600 | 1250 | 20000
+         13 | 3100 |       3 |    2 | 140000 | 1760 | 38000
+         14 | 2070 |       2 |    3 | 148000 | 1550 | 14000
+         15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000
+        \.
+
+        --  Train a regression model. First, single regression for all data.
+        SELECT {schema_madlib}.linregr_train( 'houses',
+                         'houses_linregr',
+                         'price',
+                         'ARRAY[1, tax, bath, size]'
+                       );
+        -- Generate three output models, one for each value of "bedroom".
+        SELECT {schema_madlib}.linregr_train('houses',
+                                    'houses_linregr_bedroom',
+                                    'price',
+                                    'ARRAY[1, tax, bath, size]',
+                                    'bedroom'
+                                    );
+        -- Examine the resulting models.
+        SELECT * FROM houses_linregr;
+        SELECT * FROM houses_linregr_bedroom;
+        """
+    else:
+        help_string = "No such option. Use {schema_madlib}.linregr_train()"
+
+    return help_string.format(schema_madlib=schema_madlib)
\ No newline at end of file
diff --git a/src/ports/postgres/modules/regress/linear.sql_in b/src/ports/postgres/modules/regress/linear.sql_in
index c1b7d93b9..5a5d98141 100644
--- a/src/ports/postgres/modules/regress/linear.sql_in
+++ b/src/ports/postgres/modules/regress/linear.sql_in
@@ -35,9 +35,10 @@ model of coefficients.
 
 @anchor train
 @par Training Function
+
 The linear regression training function has the following syntax.
 <pre class="syntax">
-linregr_train( source_table, 
+linregr_train( source_table,
                out_table,
                dependent_varname,
                independent_varname,
@@ -45,13 +46,14 @@ linregr_train( source_table,
                heteroskedasticity_option
              )
 </pre>
+
 \b Arguments
 <DL class="arglist">
 <DT>source_table</DT>
 <DD>TEXT. The name of the table containing the training data.</DD>
 
 <DT>out_table</DT>
-<DD>TEXT. Name of the generated table containing the output model. 
+<DD>TEXT. Name of the generated table containing the output model.
 
   The output table contains the following columns.
   <table class="output">
@@ -116,6 +118,19 @@ linregr_train( source_table,
 <DD>BOOLEAN, default: FALSE. When TRUE, the heteroskedasticity of the model is also calculated and returned with the results.</DD>
 </DL>
 
+@anchor warning
+@warning The aggregate 'linregr' has been deprecated in favor of the function
+'linregr_train'. If the aggregate 'linregr' is used to output the results of
+linear regression to a table, it is recommended to follow the general pattern
+shown below (replace text within '<...>' with the appropriate variable names).
+<pre class="syntax">
+CREATE TABLE \<output table\> AS
+SELECT (r).*
+FROM (
+    SELECT linregr(\<dependent variable\>, \<independent variable\>) as r
+    FROM \<source table\>
+    ) q;
+</pre>
 
 
 @anchor predict
@@ -157,24 +172,24 @@ COPY houses FROM STDIN WITH DELIMITER '|';
  15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000
 \\.
 </pre>
--#  Train a regression model. First, a single regression for all the data. 
-<pre class="example"> 
+-#  Train a regression model. First, a single regression for all the data.
+<pre class="example">
 SELECT madlib.linregr_train( 'houses',
-                             'houses_linregr', 
-                             'price', 
+                             'houses_linregr',
+                             'price',
                              'ARRAY[1, tax, bath, size]'
                            );
-</pre> 
+</pre>
 -# Generate three output models, one for each value of "bedroom".
 <pre class="example">
 SELECT madlib.linregr_train( 'houses',
-                             'houses_linregr_bedroom', 
-                             'price', 
+                             'houses_linregr_bedroom',
+                             'price',
                              'ARRAY[1, tax, bath, size]',
                              'bedroom'
                            );
 </pre>
--# Examine the resulting models. 
+-# Examine the resulting models.
 <pre class="example">
 -- Set extended display on for easier reading of output
 \\x ON
@@ -210,7 +225,7 @@ coef         | {0.0112536020318378,41.4132554771633,0.0225072040636757,31.397549
 r2           | 1
 std_err      | {0,0,0,0}
 t_stats      | {Infinity,Infinity,Infinity,Infinity}
-p_values     | 
+p_values     |
 condition_no | Infinity
 -[ RECORD 3 ]+--------------------------------------------------------------------------
 bedroom      | 3
@@ -237,7 +252,7 @@ SELECT houses.*,
        madlib.linregr_predict( ARRAY[1,tax,bath,size],
                                m.coef
                              ) as predict,
-        price - 
+        price -
           madlib.linregr_predict( ARRAY[1,tax,bath,size],
                                   m.coef
                                 ) as residual
@@ -374,7 +389,7 @@ http://en.wikipedia.org/wiki/Heteroscedasticity-consistent_standard_errors
 
 
 @anchor related
-@par Related Topics 
+@par Related Topics
 
 @ref grp_robust
 
@@ -391,6 +406,70 @@ File linear.sql_in, source file for the SQL functions
 @endinternal
 */
 
+---------------------------------------------------------------------------
+-- User facing functions
+---------------------------------------------------------------------------
+/**
+  * @brief Linear regression training function with grouping support.
+ **/
+CREATE FUNCTION MADLIB_SCHEMA.linregr_train(
+    source_table                VARCHAR,    -- name of input  table
+    out_table                   VARCHAR,    -- name of output table
+    dependent_varname           VARCHAR,    -- name of dependent variable
+    independent_varname         VARCHAR,    -- name of independent variable
+    grouping_cols               VARCHAR,    -- names of columns to group-by
+    heteroskedasticity_option   BOOLEAN     -- do heteroskedasticity test or not
+) RETURNS VOID AS $$
+PythonFunction(regress, linear, linregr_train)
+$$ LANGUAGE plpythonu;
+
+CREATE FUNCTION MADLIB_SCHEMA.linregr_train(
+    source_table                VARCHAR,    -- name of input  table
+    out_table                   VARCHAR,    -- name of output table
+    dependent_varname           VARCHAR,    -- name of dependent variable
+    independent_varname         VARCHAR,    -- name of independent variable
+    grouping_cols               VARCHAR     -- names of columns to group-by
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.linregr_train($1, $2, $3, $4, $5, FALSE);
+$$ LANGUAGE sql;
+
+CREATE FUNCTION MADLIB_SCHEMA.linregr_train(
+    source_table                VARCHAR,    -- name of input  table
+    out_table                   VARCHAR,    -- name of output table
+    dependent_varname           VARCHAR,    -- name of dependent variable
+    independent_varname         VARCHAR     -- name of independent variable
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.linregr_train($1, $2, $3, $4, NULL, FALSE);
+$$ LANGUAGE sql;
+---------------------------------------------------------------------------
+
+-----------------------------------------------------------------------
+-- Online help function
+-----------------------------------------------------------------------
+
+----------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linregr_train()
+RETURNS VARCHAR AS $$
+BEGIN
+    RETURN MADLIB_SCHEMA.linregr_train('');
+END;
+$$ LANGUAGE plpgsql VOLATILE;
+
+----------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linregr_train(
+     message VARCHAR     -- usage string
+)
+RETURNS VARCHAR AS $$
+PythonFunction(regress, linear, linregr_help_message)
+$$ LANGUAGE plpythonu VOLATILE;
+
+----------------------------------------------------------------------
+
+
+-- Deprecated functions --------------------------------------------------------
+
 ---------------------------------------------------------------------------
 -- Result Types
 ---------------------------------------------------------------------------
@@ -556,42 +635,3 @@ CREATE AGGREGATE MADLIB_SCHEMA.heteroskedasticity_test_linregr(
     INITCOND=''
 );
 ---------------------------------------------------------------------------
-
-
----------------------------------------------------------------------------
--- User facing functions
----------------------------------------------------------------------------
-/**
-  * @brief Linear regression training function with grouping support.
- **/
-CREATE FUNCTION MADLIB_SCHEMA.linregr_train(
-    source_table                VARCHAR,    -- name of input  table
-    out_table                   VARCHAR,    -- name of output table
-    dependent_varname           VARCHAR,    -- name of dependent variable
-    independent_varname         VARCHAR,    -- name of independent variable
-    grouping_cols               VARCHAR,    -- names of columns to group-by
-    heteroskedasticity_option   BOOLEAN     -- do heteroskedasticity test or not
-) RETURNS VOID AS $$
-PythonFunction(regress, linear, linregr_train)
-$$ LANGUAGE plpythonu;
-
-CREATE FUNCTION MADLIB_SCHEMA.linregr_train(
-    source_table                VARCHAR,    -- name of input  table
-    out_table                   VARCHAR,    -- name of output table
-    dependent_varname           VARCHAR,    -- name of dependent variable
-    independent_varname         VARCHAR,    -- name of independent variable
-    grouping_cols               VARCHAR     -- names of columns to group-by
-) RETURNS VOID AS $$
-    SELECT MADLIB_SCHEMA.linregr_train($1, $2, $3, $4, $5, FALSE);
-$$ LANGUAGE sql;
-
-CREATE FUNCTION MADLIB_SCHEMA.linregr_train(
-    source_table                VARCHAR,    -- name of input  table
-    out_table                   VARCHAR,    -- name of output table
-    dependent_varname           VARCHAR,    -- name of dependent variable
-    independent_varname         VARCHAR     -- name of independent variable
-) RETURNS VOID AS $$
-    SELECT MADLIB_SCHEMA.linregr_train($1, $2, $3, $4, NULL, FALSE);
-$$ LANGUAGE sql;
----------------------------------------------------------------------------
-
diff --git a/src/ports/postgres/modules/regress/test/linear.sql_in b/src/ports/postgres/modules/regress/test/linear.sql_in
index 35bb37daa..68ae5d15a 100644
--- a/src/ports/postgres/modules/regress/test/linear.sql_in
+++ b/src/ports/postgres/modules/regress/test/linear.sql_in
@@ -51,6 +51,7 @@ SELECT assert(
 ) q;
 
 
+
 /*
  * The following example is taken from:
  * http://biocomp.health.unm.edu/biomed505/Course/Cheminformatics/advanced/data_classification_qsar/linear_multilinear_regression.pdf
@@ -155,10 +156,6 @@ SELECT assert(
     ) AS linregr
 ) ignored;
 
-
-
-
-
 ------------------------------------------------------------------------
 
 select linregr_train('houses', 'result_lin_houses', 'price',
@@ -194,3 +191,7 @@ from result_lin_houses
 where bedroom = 3;
 
 drop table if exists result_lin_houses cascade;
+
+select linregr_train();
+select linregr_train('usage');
+select linregr_train('example');
diff --git a/src/ports/postgres/modules/regress/test/logistic.sql_in b/src/ports/postgres/modules/regress/test/logistic.sql_in
index a4f1c81a7..77e3a8709 100644
--- a/src/ports/postgres/modules/regress/test/logistic.sql_in
+++ b/src/ports/postgres/modules/regress/test/logistic.sql_in
@@ -727,7 +727,7 @@ INSERT INTO all_null_patients(id, second_attack, treatment, trait_anxiety) VALUE
 (1, NULL, 0, 60);
 
 drop table if exists temp_result;
-select madlib.logregr_train(
+select logregr_train(
     'all_null_patients',
     'temp_result',
     'second_attack',