Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of OptionChains DF #8359

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Algorithm.Python/OptionChainFullDataRegressionAlgorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def initialize(self):

# Get the contract with the latest expiration date.
# Note: the result of df.loc[] is a series, and its name is a tuple with a single element (contract symbol)
self._option_contract = contracts.loc[contracts.expiry.idxmax()].name[0]
self._option_contract = contracts.loc[contracts.expiry.idxmax()].name

self.add_option_contract(self._option_contract)

Expand Down
2 changes: 1 addition & 1 deletion Common/Data/Market/OptionChains.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private PyObject InitializeDataFrame()
var dataFrames = this.Select(kvp => kvp.Value.DataFrame).ToList();
var canonicalSymbols = this.Select(kvp => kvp.Key);

return PandasConverter.ConcatDataFrames(dataFrames, keys: canonicalSymbols, names: _indexNames);
return PandasConverter.ConcatDataFrames(dataFrames, keys: canonicalSymbols, names: _indexNames, sort: false);
}
}
}
4 changes: 4 additions & 0 deletions Common/Python/PandasConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ public PyObject GetDataFrame<T>(IEnumerable<T> data, bool symbolOnlyIndex = fals
pandasData.Add(datum);
}

if (symbolOnlyIndex)
{
return PandasData.ToPandasDataFrame(pandasDataBySymbol.Values);
}
return CreateDataFrame(pandasDataBySymbol,
// Use 2 instead of maxLevels for backwards compatibility
maxLevels: symbolOnlyIndex ? 1 : 2,
Expand Down
58 changes: 58 additions & 0 deletions Common/Python/PandasData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public class PandasData
private static PyObject _dataFrameFactory;
private static PyObject _multiIndexFactory;
private static PyObject _multiIndex;
private static PyObject _indexFactory;

private static PyList _defaultNames;
private static PyList _level1Names;
Expand Down Expand Up @@ -141,6 +142,7 @@ static PandasData()
_dataFrameFactory = _pandas.GetAttr("DataFrame");
_multiIndex = _pandas.GetAttr("MultiIndex");
_multiIndexFactory = _multiIndex.GetAttr("from_tuples");
_indexFactory = _pandas.GetAttr("Index");
_empty = new PyString(string.Empty);

var time = new PyString("time");
Expand Down Expand Up @@ -524,6 +526,62 @@ public PyObject ToPandasDataFrame(int levels = 2, bool filterMissingValueColumns
return result;
}

/// <summary>
/// Helper method to create a single pandas data frame indexed by symbol
/// </summary>
/// <remarks>Will add a single point per pandas data series (symbol)</remarks>
public static PyObject ToPandasDataFrame(IEnumerable<PandasData> pandasDatas)
{
using var _ = Py.GIL();

using var list = pandasDatas.Select(x => x._symbol).ToPyListUnSafe();

using var namesDic = Py.kw("name", _level1Names[0]);
using var index = _indexFactory.Invoke(new[] { list }, namesDic);

Dictionary<string, PyList> _valuesPerSeries = new();
foreach (var pandasData in pandasDatas)
{
foreach (var kvp in pandasData._series)
{
if (!_valuesPerSeries.TryGetValue(kvp.Key, out PyList value))
{
// Adds pandas.Series value keyed by the column name
value = _valuesPerSeries[kvp.Key] = new PyList();
}

if (kvp.Value.Values.Count > 0)
{
// taking only 1 value per symbol
using var valueOfSymbol = kvp.Value.Values[0].ToPython();
value.Append(valueOfSymbol);
}
else
{
value.Append(PyObject.None);
}
}
}

using var pyDict = new PyDict();
foreach (var kvp in _valuesPerSeries)
{
using var series = _seriesFactory.Invoke(kvp.Value, index);
using var pyStrKey = kvp.Key.ToPython();
using var pyKey = _pandasColumn.Invoke(pyStrKey);
pyDict.SetItem(pyKey, series);

kvp.Value.Dispose();
}
var result = _dataFrameFactory.Invoke(pyDict);

// Drop columns with only NaN or None values
using var dropnaKwargs = Py.kw("axis", 1, "inplace", true, "how", "all");
result.GetAttr("dropna").Invoke(Array.Empty<PyObject>(), dropnaKwargs);

return result;
}

/// <summary>
/// Only dipose of the PyObject if it was set to something different than empty
/// </summary>
Expand Down
40 changes: 20 additions & 20 deletions Tests/Algorithm/AlgorithmChainsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,30 +88,30 @@ def get_option_chain_data_from_dataframe(algorithm, canonical):

# Will make it more complex than it needs to be,
# just so that we can test indexing by symbol using df.loc[]
for (symbol,) in option_chain_df.index:
symbol_data = option_chain_df.loc[(symbol)]
for symbol in option_chain_df.index:
symbol_data = option_chain_df.loc[symbol]

if symbol_data.shape[0] != 1:
raise ValueError(f'Expected 1 row for {symbol}, got {symbol_data.shape[0]}')
if symbol_data.shape[0] != 21:
raise ValueError(f'Expected 21 row for {symbol}, got {symbol_data.shape[0]}')

yield {
'symbol': symbol,
'expiry': symbol_data['expiry'].values[0],
'strike': symbol_data['strike'].values[0],
'right': symbol_data['right'].values[0],
'style': symbol_data['style'].values[0],
'lastprice': symbol_data['lastprice'].values[0],
'askprice': symbol_data['askprice'].values[0],
'bidprice': symbol_data['bidprice'].values[0],
'openinterest': symbol_data['openinterest'].values[0],
'impliedvolatility': symbol_data['impliedvolatility'].values[0],
'delta': symbol_data['delta'].values[0],
'gamma': symbol_data['gamma'].values[0],
'vega': symbol_data['vega'].values[0],
'theta': symbol_data['theta'].values[0],
'rho': symbol_data['rho'].values[0],
'underlyingsymbol': symbol_data['underlyingsymbol'].values[0],
'underlyinglastprice': symbol_data['underlyinglastprice'].values[0],
'expiry': symbol_data['expiry'],
'strike': symbol_data['strike'],
'right': symbol_data['right'],
'style': symbol_data['style'],
'lastprice': symbol_data['lastprice'],
'askprice': symbol_data['askprice'],
'bidprice': symbol_data['bidprice'],
'openinterest': symbol_data['openinterest'],
'impliedvolatility': symbol_data['impliedvolatility'],
'delta': symbol_data['delta'],
'gamma': symbol_data['gamma'],
'vega': symbol_data['vega'],
'theta': symbol_data['theta'],
'rho': symbol_data['rho'],
'underlyingsymbol': symbol_data['underlyingsymbol'],
'underlyinglastprice': symbol_data['underlyinglastprice'],
}
");

Expand Down
Loading