Skip to content

Commit

Permalink
Add cloud user and system account group by (#797)
Browse files Browse the repository at this point in the history
* adding person and username group bys for cloud data and pipeline for adding people and usernames. adding updated regression and integration tests

* removing unneeded migration code. adding function write out partial config files

* update config migration script and remove unneeded use statements from database migration file

* documetation updates and remove erroneous commits

* formatting changes

* removing unneeded function from user and systemaccount group bys

* moving generation of event_id to event table instead of staging table

* added comments and changed query for event_asset table when dealing with generic cloud data to account for event_id being generated on the event table instead of generic_cloud_staging_event

* adding trucate_destination to etl.d file instead of truncate statement in post_ingest_updates

* moving post ingest sql update action back to original location in pipeline file

* documetation updates

* fixing style issues and updating test artifacts for unit tests

* fixes for passing unit and style tests

* remove extra spaces to pass unit tests

* adding new group bys to test artifact

* addressing comments from @jpwhite4

* removing defaults from etl pipeline files

* updating tests

* updating jobs_cloud_generic to be correct

* updating cloud person and username tests to use updated anonymized data

* removing unnecessary function to write out partial config files. replace with using JSON::savefile

* re-adding hide_sql_warning_codes in jobs_cloud_generic that were added by Ben in a previous PR

* changing $value to $unused to pass linter
  • Loading branch information
Greg Dean authored Mar 4, 2019
1 parent b1363cb commit 4f28be4
Show file tree
Hide file tree
Showing 208 changed files with 2,700 additions and 147 deletions.
25 changes: 0 additions & 25 deletions classes/DataWarehouse/Query/Cloud/GroupBys/GroupByPerson.php
Original file line number Diff line number Diff line change
Expand Up @@ -128,29 +128,4 @@ public function pullQueryParameterDescriptions(&$request)
"select long_name as field_label from modw.person where id in (_filter_) order by order_id"
);
}

public function getPossibleValues($hint = null, $limit = null, $offset = null, array $parameters = array())
{
if ($this->_possible_values_query == null) {
return array();
}

$possible_values_query = $this->_possible_values_query;

foreach ($parameters as $pname => $pvalue) {
if ($pname == 'person') {
$possible_values_query = str_ireplace('where ', "where gt.id = $pvalue and ", $possible_values_query);
} elseif ($pname == 'provider') {//find the names all the people that have accounts on the resources at the provider.
$possible_values_query = str_ireplace('from ', "from modw.systemaccount sa, modw.resourcefact rf, ", $possible_values_query);
$possible_values_query = str_ireplace('where ', "where rf.id = sa.resource_id and rf.organization_id = $pvalue and gt.id = sa.person_id and ", $possible_values_query);
} elseif ($pname == 'institution') {
$possible_values_query = str_ireplace('where ', "where gt.organization_id = $pvalue and ", $possible_values_query);
} elseif ($pname == 'pi') {
$possible_values_query = str_ireplace('from ', "from modw.peopleunderpi pup, ", $possible_values_query);
$possible_values_query = str_ireplace('where ', "where pup.principalinvestigator_person_id = $pvalue and gt.id = pup.person_id and ", $possible_values_query);
}
}

return parent::getPossibleValues($hint, $limit, $offset, $parameters, $possible_values_query);
}
}
131 changes: 131 additions & 0 deletions classes/DataWarehouse/Query/Cloud/GroupBys/GroupByUsername.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
<?php

namespace DataWarehouse\Query\Cloud\GroupBys;

/*
* @author Greg Dean
* @date 2019-01-27
*
* class for adding group by system username to a query
*
*/

class GroupByUsername extends \DataWarehouse\Query\Cloud\GroupBy
{
public static function getLabel()
{
return 'System Username';
}

public function getInfo()
{
return "The specific system username associated with a running session of a virtual machine.";
}
public function __construct()
{
parent::__construct(
'username',
array(),
"select distinct
gt.username as id,
gt.username as short_name,
gt.username as long_name
from systemaccount gt
where 1
order by gt.username",
array()
);
$this->_id_field_name = 'username';
$this->_short_name_field_name = 'username';
$this->_long_name_field_name = 'username';
$this->_order_id_field_name = 'username';
$this->modw_schema = new \DataWarehouse\Query\Model\Schema('modw');
$this->systemaccount_table = new \DataWarehouse\Query\Model\Table($this->modw_schema, 'systemaccount', 'sa');
}

public function applyTo(\DataWarehouse\Query\Query &$query, \DataWarehouse\Query\Model\Table $data_table, $multi_group = false)
{
$query->addTable($this->systemaccount_table);

$systemaccounttable_id_field = new \DataWarehouse\Query\Model\TableField($this->systemaccount_table, 'id');
$datatable_systemaccount_id_field = new \DataWarehouse\Query\Model\TableField($data_table, 'systemaccount_id');

$query->addWhereCondition(new \DataWarehouse\Query\Model\WhereCondition(
$systemaccounttable_id_field,
'=',
$datatable_systemaccount_id_field
));

$id_field = new \DataWarehouse\Query\Model\TableField($this->systemaccount_table, $this->_id_field_name, $this->getIdColumnName($multi_group));
$name_field = new \DataWarehouse\Query\Model\TableField($this->systemaccount_table, $this->_long_name_field_name, $this->getLongNameColumnName($multi_group));
$shortname_field = new \DataWarehouse\Query\Model\TableField($this->systemaccount_table, $this->_short_name_field_name, $this->getShortNameColumnName($multi_group));
$order_id_field = new \DataWarehouse\Query\Model\TableField($this->systemaccount_table, $this->_order_id_field_name, $this->getOrderIdColumnName($multi_group));

$query->addField($order_id_field);
$query->addField($id_field);
$query->addField($name_field);
$query->addField($shortname_field);

$query->addGroup($id_field);

$this->addOrder($query, $multi_group);
}

public function addWhereJoin(
\DataWarehouse\Query\Query &$query,
\DataWarehouse\Query\Model\Table $data_table,
$multi_group,
$operation,
$whereConstraint
) {
// construct the join between the main data_table and this group by table
$query->addTable($this->systemaccount_table);

$systemaccounttable_id_field = new \DataWarehouse\Query\Model\TableField($this->systemaccount_table, 'id');
$datatable_systemaccount_id_field = new \DataWarehouse\Query\Model\TableField($data_table, 'systemaccount_id');

// the where condition that specifies the join of the tables
$query->addWhereCondition(new \DataWarehouse\Query\Model\WhereCondition(
$systemaccounttable_id_field,
'=',
$datatable_systemaccount_id_field
));

// the where condition that specifies the constraint on the joined table
// note that the where condition applies to strings
if (is_array($whereConstraint)) {
$whereConstraint="('". implode("','", $whereConstraint) ."')";
}

$query->addWhereCondition(
new \DataWarehouse\Query\Model\WhereCondition(
$systemaccounttable_id_field,
$operation,
$whereConstraint
)
);
} // addWhereJoin

public function addOrder(\DataWarehouse\Query\Query &$query, $multi_group = false, $dir = 'asc', $prepend = false)
{
$orderField = new \DataWarehouse\Query\Model\OrderBy(new \DataWarehouse\Query\Model\TableField($this->systemaccount_table, $this->_order_id_field_name), $dir, $this->getName());
if ($prepend === true) {
$query->prependOrder($orderField);
} else {
$query->addOrder($orderField);
}
}

public function pullQueryParameters(&$request)
{
return parent::pullQueryParameters2($request, 'select id from modw.systemaccount where username in (_filter_)', 'systemaccount_id');
}

public function pullQueryParameterDescriptions(&$request)
{
return parent::pullQueryParameterDescriptions2(
$request,
"select distinct username as field_label from modw.systemaccount where username in (_filter_) order by username"
);
}
}
4 changes: 2 additions & 2 deletions classes/OpenXdmod/DataWarehouseInitializer.php
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ public function ingestCloudDataOpenStack()
if( $this->isRealmEnabled('Cloud') ){
try{
$this->logger->notice('Ingesting OpenStack event log data');
Utilities::runEtlPipeline(array('jobs-cloud-extract-openstack'), $this->logger);
Utilities::runEtlPipeline(array('jobs-cloud-import-users-openstack', 'jobs-cloud-extract-openstack'), $this->logger);
}
catch( Exception $e ){
if( $e->getCode() == 1146 ){
Expand All @@ -230,7 +230,7 @@ public function ingestCloudDataGeneric()
if( $this->isRealmEnabled('Cloud') ){
try{
$this->logger->notice('Ingesting generic cloud log files');
Utilities::runEtlPipeline(array('jobs-cloud-extract-generic'), $this->logger);
Utilities::runEtlPipeline(array('jobs-cloud-import-users-generic', 'jobs-cloud-extract-generic'), $this->logger);
}
catch( Exception $e ){
if( $e->getCode() == 1146 ){
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?php
/**
* @author Greg Dean <[email protected]>
*/

namespace OpenXdmod\Migration\Version800To810;

use CCR\Json;
use OpenXdmod\Migration\ConfigFilesMigration as AbstractConfigFilesMigration;
use Exception;

/**
* Update config files from version 8.0.0 To 8.0.1.
*/
class ConfigFilesMigration extends AbstractConfigFilesMigration
{

private $cloudRolesFilePath;

public function __construct($currentVersion, $newVersion)
{
$this->cloudRolesFilePath = CONFIG_DIR."/roles.d/cloud.json";
parent::__construct($currentVersion, $newVersion);
}

/**
* Execute the migration.
*/
public function execute()
{
if (file_exists($this->cloudRolesFilePath)) {
$this->addCloudRolesGroupBy();
}
}

/**
* Adds new group bys to roles.d/cloud.json
*/
public function addCloudRolesGroupBy()
{
// Json::loadFile throws an exception if the file is completely empty or if there some other
// problem loading the file. If those exceptions are thrown catch them so the rest of the
// migration script can continue to run
try{
$cloudRolesFile = Json::loadFile($this->cloudRolesFilePath);
}
catch(Exception $e){
return false;
}

if (array_key_exists('+roles', $cloudRolesFile)) {
foreach($cloudRolesFile['+roles'] as $key => $unused) {
$cloudRolesFile['+roles'][$key]['+query_descripters'][] = array('realm' => 'Cloud', 'group_by' => 'person');
$cloudRolesFile['+roles'][$key]['+query_descripters'][] = array('realm' => 'Cloud', 'group_by' => 'username');
}

// An exception can be thrown if there is a problem writing the file. Catch and log the issue
// while letting the rest of the migration script run
try{
JSON::saveFile($this->cloudRolesFilePath, $cloudRolesFile);
}
catch(Exception $e){
$this->logger->notice("Unable to write to roles.d/cloud.json config file. Continuing upgrade");
return false;
}
}

return true;
}
}
8 changes: 8 additions & 0 deletions configuration/datawarehouse.json
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,14 @@
{
"name": "submission_venue",
"class": "GroupBySubmissionVenue"
},
{
"name": "person",
"class": "GroupByPerson"
},
{
"name": "username",
"class": "GroupByUsername"
}
],
"statistics": [
Expand Down
Loading

0 comments on commit 4f28be4

Please sign in to comment.