Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to create one-to-one 'join_on_key'-type links to the GUI link editor #2313

Merged
merged 14 commits into from
Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Full changelog
v1.6.0 (unreleased)
-------------------

* Added one-to-one ``join_on_key``-type links to the link-manager allowing
them to be created and deleted through the UI. This option is available
under 'Create advanced link>Join>Join on ID.' [#2215]

* Modify histogram viewer to not prepend x-axis label with 'Log' when using a log scale x-axis. [#2325]

* Modify scatter viewer to not prepend axis labels with 'Log' when using log scale axes. [#2323]
Expand Down
57 changes: 55 additions & 2 deletions glue/core/link_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@

import types

from glue.config import link_function
from glue.config import link_function, link_helper

from glue.core.data import ComponentID
from glue.core.component_link import ComponentLink

from inspect import getfullargspec


__all__ = ['LinkCollection', 'LinkSame', 'LinkTwoWay', 'MultiLink',
'LinkAligned', 'BaseMultiLink', 'ManualLinkCollection']
'LinkAligned', 'BaseMultiLink', 'ManualLinkCollection',
'JoinLink']


@link_function("Link conceptually identical components",
Expand Down Expand Up @@ -463,3 +465,54 @@ def __init__(self, data1=None, data2=None,
FunctionalLinkCollection.description = description or ''

return FunctionalLinkCollection


@link_helper(category="Join")
class JoinLink(LinkCollection):
cid_independent = False

display = "Join on ID"
description = "Join two datasets on a common ID. Other links \
in glue connect data columns (two datasets have 'age' columns but \
the rows are different objects), while Join on ID connects the same \
rows/items across two datasets."

labels1 = ["Identifier in dataset 1"]
labels2 = ["Identifier in dataset 2"]

def __init__(self, *args, cids1=None, cids2=None, data1=None, data2=None):
# only support linking by one value now, even though link_by_value supports multiple
assert len(cids1) == 1
assert len(cids2) == 1

self.data1 = data1
self.data2 = data2
self.cids1 = cids1
self.cids2 = cids2

self._links = []

def __str__(self):
# The >< here is one symbol for a database join
return '%s >< %s' % (self.cids1, self.cids2)

def __repr__(self):
return "<JoinLink: %s>" % self

# Define __eq__ and __ne__ to facilitate removing
# these kinds of links from the link_manager
def __eq__(self, other):
if not isinstance(other, JoinLink):
return False
same = ((self.data1 == other.data1) and
(self.data2 == other.data2) and
(self.cids1 == other.cids1) and
(self.cids2 == other.cids2))
flip = ((self.data1 == other.data2) and
(self.data2 == other.data1) and
(self.cids1 == other.cids2) and
(self.cids2 == other.cids1))
return same or flip

def __ne__(self, other):
return not self.__eq__(other)
15 changes: 14 additions & 1 deletion glue/core/link_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from glue.core.hub import HubListener
from glue.core.message import DataCollectionDeleteMessage, DataRemoveComponentMessage
from glue.core.contracts import contract
from glue.core.link_helpers import LinkCollection
from glue.core.link_helpers import LinkCollection, JoinLink
from glue.core.component_link import ComponentLink
from glue.core.data import Data, BaseCartesianData
from glue.core.component import DerivedComponent
Expand Down Expand Up @@ -184,6 +184,8 @@ def add_link(self, link, update_external=True):
self.update_externally_derivable_components()
else:
if link not in self._external_links and isinstance(link, LinkCollection) or link.inverse not in self._external_links:
if isinstance(link, JoinLink):
link.data1.join_on_key(link.data2, link.cids1[0], link.cids2[0])
self._external_links.append(link)
if update_external:
self.update_externally_derivable_components()
Expand All @@ -197,6 +199,17 @@ def remove_link(self, link, update_external=True):
self.update_externally_derivable_components()
else:
logging.getLogger(__name__).debug('removing link %s', link)
if isinstance(link, JoinLink):
data_to_remove_from_data1 = None
data_to_remove_from_data2 = None
for other_data, key_join in link.data1._key_joins.items():
cid, cid_other = key_join
if (other_data == link.data2):
if (cid[0] == link.cids1[0]) and (cid_other[0] == link.cids2[0]): # assumes single-linkage
data_to_remove_from_data1 = other_data
data_to_remove_from_data2 = link.data1
link.data1._key_joins.pop(data_to_remove_from_data1) # Assume these joins are set up right
link.data2._key_joins.pop(data_to_remove_from_data2)
self._external_links.remove(link)
if update_external:
self.update_externally_derivable_components()
Expand Down
122 changes: 122 additions & 0 deletions glue/core/tests/test_join_on_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from glue.core import Data, DataCollection
from glue.core.exceptions import IncompatibleAttribute
from glue.core.link_helpers import JoinLink

from numpy.testing import assert_array_equal
import pytest


def test_remove_and_add_again():
d1 = Data(x=[1, 2, 3, 4, 5], k1=[0, 0, 1, 1, 2], label='d1')
d2 = Data(y=[2, 4, 5, 8, 4], k2=[1, 3, 1, 2, 3], label='d2')
dc = DataCollection([d1, d2])

mylink = JoinLink(cids1=[d1.id['k1']], cids2=[d2.id['k2']], data1=d1, data2=d2)
dc.add_link(mylink)

dc.remove_link(mylink)
s = d1.new_subset()
s.subset_state = d1.id['x'] > 2
assert_array_equal(s.to_mask(), [False, False, True, True, True])
s = d2.new_subset()
s.subset_state = d1.id['x'] > 2
with pytest.raises(IncompatibleAttribute):
assert_array_equal(s.to_mask(), [True, False, True, True, False])
mylink = JoinLink(cids1=[d1.id['k1']], cids2=[d2.id['k2']], data1=d1, data2=d2)
dc.add_link(mylink)
s = d2.new_subset()
s.subset_state = d1.id['x'] > 2
assert_array_equal(s.to_mask(), [True, False, True, True, False])


def test_remove_is_clean():
d1 = Data(x=[1, 2, 3, 4, 5], k1=[0, 0, 1, 1, 2], label='d1')
d2 = Data(y=[2, 4, 5, 8, 4], k2=[1, 3, 1, 2, 3], label='d2')
dc = DataCollection([d1, d2])

mylink = JoinLink(cids1=[d1.id['k1']], cids2=[d2.id['k2']], data1=d1, data2=d2)
dc.add_link(mylink)

dc.remove_link(mylink)
s = d1.new_subset()
s.subset_state = d1.id['x'] > 2
assert_array_equal(s.to_mask(), [False, False, True, True, True])
s = d2.new_subset()
s.subset_state = d1.id['x'] > 2
with pytest.raises(IncompatibleAttribute):
assert_array_equal(s.to_mask(), [True, False, True, True, False])


def test_remove():
d1 = Data(x=[1, 2, 3, 4, 5], k1=[0, 0, 1, 1, 2], label='d1')
d2 = Data(y=[2, 4, 5, 8, 4], k2=[1, 3, 1, 2, 3], label='d2')
dc = DataCollection([d1, d2])

assert len(dc._link_manager._external_links) == 0
assert len(dc.links) == 0
assert d1._key_joins == {}
assert d2._key_joins == {}

mylink = JoinLink(cids1=[d1.id['k1']], cids2=[d2.id['k2']], data1=d1, data2=d2)
dc.add_link(mylink)
assert len(dc._link_manager._external_links) == 1 # The link manager tracks all links
assert len(dc.links) == 0 # dc.links just keeps component links so joins do not show up here
dc.remove_link(mylink)
assert len(dc._link_manager._external_links) == 0
assert len(dc.links) == 0

assert d1._key_joins == {}
assert d2._key_joins == {}


def test_using_link_index():
d1 = Data(x=[1, 2, 3, 4, 5], k1=[0, 0, 1, 1, 2], label='d1')
d2 = Data(y=[2, 4, 5, 8, 4], k2=[1, 3, 1, 2, 3], label='d2')
dc = DataCollection([d1, d2])

assert len(dc._link_manager._external_links) == 0
assert len(dc.links) == 0
dc.add_link(JoinLink(cids1=[d1.id['k1']], cids2=[d2.id['k2']], data1=d1, data2=d2))
assert len(dc.links) == 0
assert len(dc._link_manager._external_links) == 1

s = d1.new_subset()
s.subset_state = d1.id['x'] > 2
assert_array_equal(s.to_mask(), [False, False, True, True, True])
s = d2.new_subset()
s.subset_state = d1.id['x'] > 2
assert_array_equal(s.to_mask(), [True, False, True, True, False])


def test_basic_join_on_key():
d1 = Data(x=[1, 2, 3, 4, 5], k1=[0, 0, 1, 1, 2], label='d1')
d2 = Data(y=[2, 4, 5, 8, 4], k2=[1, 3, 1, 2, 3], label='d2')
d2.join_on_key(d1, 'k2', 'k1')

s = d1.new_subset()
s.subset_state = d1.id['x'] > 2
assert_array_equal(s.to_mask(), [False, False, True, True, True])
s = d2.new_subset()
s.subset_state = d1.id['x'] > 2
assert_array_equal(s.to_mask(), [True, False, True, True, False])


def test_setup_and_eq_logic():
d1 = Data(x=[1, 2, 3, 4, 5], k1=[0, 0, 1, 1, 2], label='d1')
d2 = Data(y=[2, 4, 5, 8, 4], k2=[1, 3, 1, 2, 3], label='d2')
d3 = Data(y=[2, 4, 5, 8, 4], k2=[1, 3, 1, 2, 3], label='d3')

dc = DataCollection([d1, d2, d3])
a = JoinLink(cids1=[d1.id['k1']], cids2=[d2.id['k2']], data1=d1, data2=d2)
b = JoinLink(cids1=[d2.id['k2']], cids2=[d1.id['k1']], data1=d2, data2=d1)
assert a.data1 == d1
assert a.data2 == d2
assert a.cids1[0] == d1.id['k1']
assert a.cids2[0] == d2.id['k2']
assert str(a) == '[k1] >< [k2]'
assert repr(a) == '<JoinLink: [k1] >< [k2]>'
assert a == a
assert a == b
c = JoinLink(cids1=[d3.id['k2']], cids2=[d1.id['k1']], data1=d3, data2=d1)
assert c != b
assert c != a
26 changes: 15 additions & 11 deletions glue/dialogs/link_editor/qt/data_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,21 @@
COLOR_DISCONNECTED = (0.9, 0.6, 0.6)


def get_pen(color, linewidth=1):
def get_pen(color, linewidth=1, linestyle=Qt.SolidLine):
color = mpl_to_qt_color(color)
return QPen(color, linewidth, Qt.SolidLine, Qt.RoundCap, Qt.RoundJoin)
return QPen(color, linewidth, linestyle, Qt.RoundCap, Qt.RoundJoin)


class Edge(QGraphicsLineItem):

def __init__(self, node_source, node_dest, linewidth=3, zindex=5):
def __init__(self, node_source, node_dest, linewidth=3, zindex=5, link_type="value"):
self.linewidth = linewidth
self.node_source = node_source
self.node_dest = node_dest
if link_type == 'join':
self.linestyle = Qt.DashLine
else:
self.linestyle = Qt.SolidLine
super(Edge, self).__init__(0, 0, 1, 1)
self.setZValue(zindex)
self.color = '0.5'
Expand All @@ -40,7 +44,7 @@ def color(self):

@color.setter
def color(self, value):
self.setPen(get_pen(value, self.linewidth))
self.setPen(get_pen(value, self.linewidth, self.linestyle))

def add_to_scene(self, scene):
scene.addItem(self)
Expand Down Expand Up @@ -163,7 +167,7 @@ def get_connections(dc_links):
data1 = link.data1
data2 = link.data2
if (data1, data2) not in links and (data2, data1) not in links:
links.append((data1, data2))
links.append((data1, data2, link.link_type))

return links

Expand Down Expand Up @@ -276,14 +280,14 @@ def set_data_collection(self, data_collection, old_links=None, new_links=None):
# Get links and set up edges

if old_links:
self.background_edges = [Edge(self.data_to_nodes[data1], self.data_to_nodes[data2], linewidth=1, zindex=1)
for data1, data2 in get_connections(data_collection.external_links)]
self.background_edges = [Edge(self.data_to_nodes[data1], self.data_to_nodes[data2], linewidth=1, zindex=1, link_type=link_type)
for data1, data2, link_type in get_connections(data_collection.external_links)]
else:
self.background_edges = []

if new_links:
self.edges = [Edge(self.data_to_nodes[data1], self.data_to_nodes[data2])
for data1, data2 in get_connections(new_links)]
self.edges = [Edge(self.data_to_nodes[data1], self.data_to_nodes[data2], link_type=link_type)
for data1, data2, link_type in get_connections(new_links)]
else:
self.edges = []

Expand All @@ -309,8 +313,8 @@ def set_links(self, links):
for edge in self.edges:
edge.remove_from_scene(self.scene)

self.edges = [Edge(self.data_to_nodes[data1], self.data_to_nodes[data2])
for data1, data2 in get_connections(links)]
self.edges = [Edge(self.data_to_nodes[data1], self.data_to_nodes[data2], link_type=link_type)
for data1, data2, link_type in get_connections(links)]

for edge in self.edges:
edge.update_position()
Expand Down
54 changes: 54 additions & 0 deletions glue/dialogs/link_editor/qt/tests/test_link_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,3 +669,57 @@ def test_preexisting_links_twodata(self):
dialog.show()

dialog.accept()


class TestLinkEditorForJoins:

def setup_method(self, method):

self.data1 = Data(x=['101', '102', '105'], y=[2, 3, 4], z=[6, 5, 4], label='data1')
self.data2 = Data(a=['102', '104', '105'], b=[4, 5, 4], c=[3, 4, 1], label='data2')

self.data_collection = DataCollection([self.data1, self.data2])

def test_make_and_delete_link(self):
# Make sure the dialog opens and closes and check default settings.
dialog = LinkEditor(self.data_collection)
dialog.show()
link_widget = dialog.link_widget
link_widget.state.data1 = self.data1
link_widget.state.data2 = self.data2
add_JoinLink = get_action(link_widget, 'Join on ID')

add_JoinLink.trigger()
# Ensure that all events get processed
# key_joins only happen on dialog.accept()
process_events()
dialog.accept()

assert len(self.data_collection.links) == 0
assert len(self.data_collection._link_manager._external_links) == 1

assert self.data1._key_joins != {}
assert self.data2._key_joins != {}

dialog.show()
link_widget = dialog.link_widget

# With two datasets this will select the current link
assert link_widget.listsel_current_link.count() == 1
assert link_widget.link_details.text().startswith('Join two datasets')
link_widget.state.current_link.data1 = self.data1
link_widget.state.current_link.data2 = self.data2

link_widget.state.current_link.link_type = 'join' # Not sure why we need to set this in the test

assert link_widget.state.current_link.link in self.data_collection._link_manager._external_links
assert link_widget.button_remove_link.isEnabled()

link_widget.button_remove_link.click()
process_events()

dialog.accept()
assert len(self.data_collection.links) == 0
assert len(self.data_collection._link_manager._external_links) == 0
assert self.data1._key_joins == {}
assert self.data2._key_joins == {}
Loading