@@ -14,6 +14,37 @@ using namespace NYql::NNodes;
14
14
15
15
namespace {
16
16
17
+ TExprNode::TPtr ExpandJoinInput (const TStructExprType& type, TExprNode::TPtr&& arg, TExprContext& ctx, std::vector<std::pair<TString, const TTypeAnnotationNode*>>& convertedItems, TPositionHandle position) {
18
+ return ctx.Builder (arg->Pos ())
19
+ .Callable (" ExpandMap" )
20
+ .Add (0 , std::move (arg))
21
+ .Lambda (1 )
22
+ .Param (" item" )
23
+ .Do ([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
24
+ auto i = 0U ;
25
+ for (const auto & item : type.GetItems ()) {
26
+ parent.Callable (i, " Member" )
27
+ .Arg (0 , " item" )
28
+ .Atom (1 , item->GetName ())
29
+ .Seal ();
30
+ i++;
31
+ }
32
+ for (const auto & convertedItem : convertedItems) {
33
+ parent.Callable (i, " StrictCast" )
34
+ .Callable (0 , " Member" )
35
+ .Arg (0 , " item" )
36
+ .Atom (1 , convertedItem.first )
37
+ .Seal ()
38
+ .Add (1 , ExpandType (position, *convertedItem.second , ctx))
39
+ .Seal ();
40
+ i++;
41
+ }
42
+ return parent;
43
+ })
44
+ .Seal ()
45
+ .Seal ().Build ();
46
+ }
47
+
17
48
struct TJoinInputDesc {
18
49
TJoinInputDesc (TMaybe<THashSet<TStringBuf>> labels, const TExprBase& input,
19
50
TSet<std::pair<TStringBuf, TStringBuf>>&& keys)
@@ -1151,25 +1182,7 @@ TExprBase DqBuildJoinDict(const TDqJoin& join, TExprContext& ctx) {
1151
1182
1152
1183
namespace {
1153
1184
1154
- TExprNode::TPtr ExpandJoinInput (const TStructExprType& type, TExprNode::TPtr&& arg, TExprContext& ctx) {
1155
- return ctx.Builder (arg->Pos ())
1156
- .Callable (" ExpandMap" )
1157
- .Add (0 , std::move (arg))
1158
- .Lambda (1 )
1159
- .Param (" item" )
1160
- .Do ([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
1161
- auto i = 0U ;
1162
- for (const auto & item : type.GetItems ()) {
1163
- parent.Callable (i++, " Member" )
1164
- .Arg (0 , " item" )
1165
- .Atom (1 , item->GetName ())
1166
- .Seal ();
1167
- }
1168
- return parent;
1169
- })
1170
- .Seal ()
1171
- .Seal ().Build ();
1172
- }
1185
+
1173
1186
1174
1187
TExprNode::TPtr SqueezeJoinInputToDict (TExprNode::TPtr&& input, size_t width, const std::vector<ui32>& keys, bool withPayloads, bool multiRow, TExprContext& ctx) {
1175
1188
YQL_ENSURE (width > 0U && !keys.empty ());
@@ -1317,7 +1330,7 @@ TExprNode::TPtr ReplaceJoinOnSide(TExprNode::TPtr&& input, const TTypeAnnotation
1317
1330
1318
1331
}
1319
1332
1320
- TExprBase DqBuildHashJoin (const TDqJoin& join, EHashJoinMode mode, TExprContext& ctx, IOptimizationContext& optCtx, bool shuffleElimination, bool shuffleEliminationWithMap, bool useBlockHashJoin ) {
1333
+ TExprBase DqBuildHashJoin (const TDqJoin& join, EHashJoinMode mode, TExprContext& ctx, IOptimizationContext& optCtx, bool shuffleElimination, bool shuffleEliminationWithMap, bool ) {
1321
1334
const auto joinType = join.JoinType ().Value ();
1322
1335
YQL_ENSURE (joinType != " Cross" sv);
1323
1336
@@ -1558,8 +1571,10 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext&
1558
1571
TCoArgument leftInputArg{ctx.NewArgument (join.LeftInput ().Pos (), " _dq_join_left" )};
1559
1572
TCoArgument rightInputArg{ctx.NewArgument (join.RightInput ().Pos (), " _dq_join_right" )};
1560
1573
1561
- auto leftWideFlow = ExpandJoinInput (*leftStructType, leftInputArg.Ptr (), ctx);
1562
- auto rightWideFlow = ExpandJoinInput (*rightStructType, rightInputArg.Ptr (), ctx);
1574
+ // For standard Grace join we don't need type conversions
1575
+ std::vector<std::pair<TString, const TTypeAnnotationNode*>> emptyConversions;
1576
+ auto leftWideFlow = ExpandJoinInput (*leftStructType, leftInputArg.Ptr (), ctx, emptyConversions, join.Pos ());
1577
+ auto rightWideFlow = ExpandJoinInput (*rightStructType, rightInputArg.Ptr (), ctx, emptyConversions, join.Pos ());
1563
1578
1564
1579
const auto leftFullWidth = leftNames.size ();
1565
1580
const auto rightFullWidth = rightNames.size ();
@@ -1580,22 +1595,6 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext&
1580
1595
switch (mode) {
1581
1596
case EHashJoinMode::GraceAndSelf:
1582
1597
case EHashJoinMode::Grace:
1583
- if (useBlockHashJoin) {
1584
- // Create TDqPhyBlockHashJoin node with structured inputs - peephole will handle conversion
1585
- // Pass the original structured inputs, not wide flows
1586
- hashJoin = Build<TDqPhyBlockHashJoin>(ctx, join.Pos ())
1587
- .LeftInput (leftInputArg)
1588
- .RightInput (rightInputArg)
1589
- .LeftLabel (join.LeftLabel ())
1590
- .RightLabel (join.RightLabel ())
1591
- .JoinType (join.JoinType ())
1592
- .JoinKeys (join.JoinKeys ())
1593
- .LeftJoinKeyNames (join.LeftJoinKeyNames ())
1594
- .RightJoinKeyNames (join.RightJoinKeyNames ())
1595
- .Done ().Ptr ();
1596
- break ;
1597
- }
1598
-
1599
1598
hashJoin = ctx.Builder (join.Pos ())
1600
1599
.Callable (callableName)
1601
1600
.Do ([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
@@ -1727,7 +1726,7 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext&
1727
1726
.Do ([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
1728
1727
for (ui32 i = 0U ; i < rightNames.size (); ++i) {
1729
1728
parent.Atom (2 *i, ctx.GetIndexAsString (i), TNodeFlags::Default);
1730
- parent.Atom (2 *i + 1 , ctx.GetIndexAsString (i + leftNames. size () ), TNodeFlags::Default);
1729
+ parent.Atom (2 *i + 1 , ctx.GetIndexAsString (i), TNodeFlags::Default);
1731
1730
}
1732
1731
return parent;
1733
1732
})
@@ -1827,46 +1826,46 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext&
1827
1826
ythrow yexception () << " Invalid hash join mode: " << mode;
1828
1827
}
1829
1828
1830
- if (!useBlockHashJoin) {
1831
- std::vector<TString> fullColNames;
1832
- for (const auto & v: leftNames) {
1833
- if (leftTableName.empty ()) {
1834
- fullColNames.emplace_back (v.first );
1835
- } else {
1836
- fullColNames.emplace_back (FullColumnName (leftTableName, v.first ));
1837
- }
1829
+ // Apply NarrowMap to convert wide output to structured output for all join types
1830
+ std::vector<TString> fullColNames;
1831
+ for (const auto & v: leftNames) {
1832
+ if (leftTableName.empty ()) {
1833
+ fullColNames.emplace_back (v.first );
1834
+ } else {
1835
+ fullColNames.emplace_back (FullColumnName (leftTableName, v.first ));
1838
1836
}
1837
+ }
1839
1838
1840
- for (const auto & v: rightNames ) {
1841
- if (rightTableName.empty ()) {
1842
- fullColNames.emplace_back (v.first );
1843
- } else {
1844
- fullColNames.emplace_back (FullColumnName (rightTableName, v.first ));
1845
- }
1839
+ for (const auto & v: rightNames ) {
1840
+ if (rightTableName.empty ()) {
1841
+ fullColNames.emplace_back (v.first );
1842
+ } else {
1843
+ fullColNames.emplace_back (FullColumnName (rightTableName, v.first ));
1846
1844
}
1845
+ }
1847
1846
1848
- hashJoin = ctx. Builder ( join. Pos ())
1849
- . Callable ( " NarrowMap " )
1850
- . Add ( 0 , std::move (hashJoin) )
1851
- . Lambda ( 1 )
1852
- . Params ( " output " , fullColNames. size () )
1853
- . Callable ( " AsStruct " )
1854
- . Do ([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
1855
- ui32 i = 0U ;
1856
- for ( const auto & colName : fullColNames) {
1857
- parent. List (i)
1858
- . Atom ( 0 , colName )
1859
- . Arg ( 1 , " output " , i )
1860
- .Seal ();
1861
- i++ ;
1862
- }
1863
- return parent;
1864
- })
1865
- . Seal ( )
1847
+ // Apply NarrowMap to convert wide output to structured output for all join types
1848
+ hashJoin = ctx. Builder (join. Pos () )
1849
+ . Callable ( " NarrowMap " )
1850
+ . Add ( 0 , std::move (hashJoin) )
1851
+ . Lambda ( 1 )
1852
+ . Params ( " output " , fullColNames. size () )
1853
+ . Callable ( " AsStruct " )
1854
+ . Do ([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
1855
+ ui32 i = 0U ;
1856
+ for ( const auto & colName : fullColNames) {
1857
+ parent. List (i )
1858
+ . Atom ( 0 , colName )
1859
+ .Arg ( 1 , " output " , i)
1860
+ . Seal () ;
1861
+ i++;
1862
+ }
1863
+ return parent;
1864
+ } )
1866
1865
.Seal ()
1867
1866
.Seal ()
1868
- . Build ();
1869
- }
1867
+ . Seal ()
1868
+ . Build ();
1870
1869
1871
1870
// this func add join to the stage and add connection to it. we do this instead of map connection to reduce data network interacting
1872
1871
auto addJoinToStage =
@@ -1961,4 +1960,7 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext&
1961
1960
.Done ();
1962
1961
}
1963
1962
1963
+
1964
+
1964
1965
} // namespace NYql::NDq
1966
+
0 commit comments