diff --git a/dataset/src/test/scala/frameless/JoinTests.scala b/dataset/src/test/scala/frameless/JoinTests.scala index 0eda70031..91b0169a9 100644 --- a/dataset/src/test/scala/frameless/JoinTests.scala +++ b/dataset/src/test/scala/frameless/JoinTests.scala @@ -82,7 +82,7 @@ class JoinTests extends TypedDatasetSuite { check(forAll(prop[Int, Long, String]((leftDs, rightDs) => leftDs .join(rightDs).full(leftDs.col('a) === rightDs.col('a))) _)) check(forAll(prop[Int, Long, String]((leftDs, rightDs) => leftDs - .join(rightDs).full(_.col('a) === rightDs.col('a))) _)) + .join(rightDs).full(_('a) === rightDs.col('a))) _)) check(forAll(prop[Int, Long, String]((leftDs, rightDs) => leftDs .join(rightDs).full(_.col('a) === _.col('a))) _)) } diff --git a/docs/FeatureOverview.md b/docs/FeatureOverview.md index 66504a129..7a0882b21 100644 --- a/docs/FeatureOverview.md +++ b/docs/FeatureOverview.md @@ -646,6 +646,30 @@ withCityInfo.select( ).as[AptPriceCity].show().run ``` +### Chained Joins + +Joins may be chained using the ChainedJoinOps: + +```scala mdoc +import frameless.syntax.ChainedJoinSyntax + +val withBedroomInfo = aptTypedDs.join(citiInfoTypedDS).inner { aptTypedDs('city) === citiInfoTypedDS('name) } + .join(bedroomStats).left { currentDs => currentDs.col('_1).field('city) === bedroomStats('city)} + +withBedroomInfo.show().run() +``` + +you may also use a version that provides the joined dataset as an additional parameter to the condition function. + +```scala mdoc +import frameless.syntax.ChainedJoinSyntax + +val withBedroomInfoBothSidesAsParameters = aptTypedDs.join(citiInfoTypedDS).inner { aptTypedDs('city) === citiInfoTypedDS('name) } + .join(bedroomStats).left { (currentDs, joinedDs) => currentDs('_1).field('city) === joinedDs('city)} + +withBedroomInfoBothSidesAsParameters.show().run() +``` + ```scala mdoc:invisible spark.stop() ```