Skip to content

Commit

Permalink
Improve performance of OptionChain (#8359)
Browse files Browse the repository at this point in the history
- Improve performance of OptionChain by creating a single pandas df
  • Loading branch information
Martin-Molinero authored Oct 4, 2024
1 parent e16b27f commit 0a2c05a
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Algorithm.Python/OptionChainFullDataRegressionAlgorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def initialize(self):

# Get the contract with the latest expiration date.
# Note: the result of df.loc[] is a series, and its name is a tuple with a single element (contract symbol)
self._option_contract = contracts.loc[contracts.expiry.idxmax()].name[0]
self._option_contract = contracts.loc[contracts.expiry.idxmax()].name

self.add_option_contract(self._option_contract)

Expand Down
2 changes: 1 addition & 1 deletion Common/Data/Market/OptionChains.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private PyObject InitializeDataFrame()
var dataFrames = this.Select(kvp => kvp.Value.DataFrame).ToList();
var canonicalSymbols = this.Select(kvp => kvp.Key);

return PandasConverter.ConcatDataFrames(dataFrames, keys: canonicalSymbols, names: _indexNames);
return PandasConverter.ConcatDataFrames(dataFrames, keys: canonicalSymbols, names: _indexNames, sort: false);
}
}
}
4 changes: 4 additions & 0 deletions Common/Python/PandasConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ public PyObject GetDataFrame<T>(IEnumerable<T> data, bool symbolOnlyIndex = fals
pandasData.Add(datum);
}

if (symbolOnlyIndex)
{
return PandasData.ToPandasDataFrame(pandasDataBySymbol.Values);
}
return CreateDataFrame(pandasDataBySymbol,
// Use 2 instead of maxLevels for backwards compatibility
maxLevels: symbolOnlyIndex ? 1 : 2,
Expand Down
58 changes: 58 additions & 0 deletions Common/Python/PandasData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public class PandasData
private static PyObject _dataFrameFactory;
private static PyObject _multiIndexFactory;
private static PyObject _multiIndex;
private static PyObject _indexFactory;

private static PyList _defaultNames;
private static PyList _level1Names;
Expand Down Expand Up @@ -141,6 +142,7 @@ static PandasData()
_dataFrameFactory = _pandas.GetAttr("DataFrame");
_multiIndex = _pandas.GetAttr("MultiIndex");
_multiIndexFactory = _multiIndex.GetAttr("from_tuples");
_indexFactory = _pandas.GetAttr("Index");
_empty = new PyString(string.Empty);

var time = new PyString("time");
Expand Down Expand Up @@ -524,6 +526,62 @@ public PyObject ToPandasDataFrame(int levels = 2, bool filterMissingValueColumns
return result;
}

/// <summary>
/// Helper method to create a single pandas data frame indexed by symbol
/// </summary>
/// <remarks>Will add a single point per pandas data series (symbol)</remarks>
public static PyObject ToPandasDataFrame(IEnumerable<PandasData> pandasDatas)
{
using var _ = Py.GIL();

using var list = pandasDatas.Select(x => x._symbol).ToPyListUnSafe();

using var namesDic = Py.kw("name", _level1Names[0]);
using var index = _indexFactory.Invoke(new[] { list }, namesDic);

Dictionary<string, PyList> _valuesPerSeries = new();
foreach (var pandasData in pandasDatas)
{
foreach (var kvp in pandasData._series)
{
if (!_valuesPerSeries.TryGetValue(kvp.Key, out PyList value))
{
// Adds pandas.Series value keyed by the column name
value = _valuesPerSeries[kvp.Key] = new PyList();
}

if (kvp.Value.Values.Count > 0)
{
// taking only 1 value per symbol
using var valueOfSymbol = kvp.Value.Values[0].ToPython();
value.Append(valueOfSymbol);
}
else
{
value.Append(PyObject.None);
}
}
}

using var pyDict = new PyDict();
foreach (var kvp in _valuesPerSeries)
{
using var series = _seriesFactory.Invoke(kvp.Value, index);
using var pyStrKey = kvp.Key.ToPython();
using var pyKey = _pandasColumn.Invoke(pyStrKey);
pyDict.SetItem(pyKey, series);

kvp.Value.Dispose();
}
var result = _dataFrameFactory.Invoke(pyDict);

// Drop columns with only NaN or None values
using var dropnaKwargs = Py.kw("axis", 1, "inplace", true, "how", "all");
result.GetAttr("dropna").Invoke(Array.Empty<PyObject>(), dropnaKwargs);

return result;
}

/// <summary>
/// Only dipose of the PyObject if it was set to something different than empty
/// </summary>
Expand Down
40 changes: 20 additions & 20 deletions Tests/Algorithm/AlgorithmChainsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,30 +88,30 @@ def get_option_chain_data_from_dataframe(algorithm, canonical):
# Will make it more complex than it needs to be,
# just so that we can test indexing by symbol using df.loc[]
for (symbol,) in option_chain_df.index:
symbol_data = option_chain_df.loc[(symbol)]
for symbol in option_chain_df.index:
symbol_data = option_chain_df.loc[symbol]
if symbol_data.shape[0] != 1:
raise ValueError(f'Expected 1 row for {symbol}, got {symbol_data.shape[0]}')
if symbol_data.shape[0] != 21:
raise ValueError(f'Expected 21 row for {symbol}, got {symbol_data.shape[0]}')
yield {
'symbol': symbol,
'expiry': symbol_data['expiry'].values[0],
'strike': symbol_data['strike'].values[0],
'right': symbol_data['right'].values[0],
'style': symbol_data['style'].values[0],
'lastprice': symbol_data['lastprice'].values[0],
'askprice': symbol_data['askprice'].values[0],
'bidprice': symbol_data['bidprice'].values[0],
'openinterest': symbol_data['openinterest'].values[0],
'impliedvolatility': symbol_data['impliedvolatility'].values[0],
'delta': symbol_data['delta'].values[0],
'gamma': symbol_data['gamma'].values[0],
'vega': symbol_data['vega'].values[0],
'theta': symbol_data['theta'].values[0],
'rho': symbol_data['rho'].values[0],
'underlyingsymbol': symbol_data['underlyingsymbol'].values[0],
'underlyinglastprice': symbol_data['underlyinglastprice'].values[0],
'expiry': symbol_data['expiry'],
'strike': symbol_data['strike'],
'right': symbol_data['right'],
'style': symbol_data['style'],
'lastprice': symbol_data['lastprice'],
'askprice': symbol_data['askprice'],
'bidprice': symbol_data['bidprice'],
'openinterest': symbol_data['openinterest'],
'impliedvolatility': symbol_data['impliedvolatility'],
'delta': symbol_data['delta'],
'gamma': symbol_data['gamma'],
'vega': symbol_data['vega'],
'theta': symbol_data['theta'],
'rho': symbol_data['rho'],
'underlyingsymbol': symbol_data['underlyingsymbol'],
'underlyinglastprice': symbol_data['underlyinglastprice'],
}
");

Expand Down

0 comments on commit 0a2c05a

Please sign in to comment.