@@ -1710,6 +1710,40 @@ impl DataFrame {
1710
1710
} )
1711
1711
}
1712
1712
1713
+ /// Calculate the distinct intersection of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema
1714
+ ///
1715
+ /// ```
1716
+ /// # use datafusion::prelude::*;
1717
+ /// # use datafusion::error::Result;
1718
+ /// # use datafusion_common::assert_batches_sorted_eq;
1719
+ /// # #[tokio::main]
1720
+ /// # async fn main() -> Result<()> {
1721
+ /// let ctx = SessionContext::new();
1722
+ /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
1723
+ /// let d2 = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
1724
+ /// let df = df.intersect_distinct(d2)?;
1725
+ /// let expected = vec![
1726
+ /// "+---+---+---+",
1727
+ /// "| a | b | c |",
1728
+ /// "+---+---+---+",
1729
+ /// "| 1 | 2 | 3 |",
1730
+ /// "+---+---+---+"
1731
+ /// ];
1732
+ /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
1733
+ /// # Ok(())
1734
+ /// # }
1735
+ /// ```
1736
+ pub fn intersect_distinct ( self , dataframe : DataFrame ) -> Result < DataFrame > {
1737
+ let left_plan = self . plan ;
1738
+ let right_plan = dataframe. plan ;
1739
+ let plan = LogicalPlanBuilder :: intersect ( left_plan, right_plan, false ) ?;
1740
+ Ok ( DataFrame {
1741
+ session_state : self . session_state ,
1742
+ plan,
1743
+ projection_requires_validation : true ,
1744
+ } )
1745
+ }
1746
+
1713
1747
/// Calculate the exception of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema
1714
1748
///
1715
1749
/// ```
@@ -1746,6 +1780,42 @@ impl DataFrame {
1746
1780
} )
1747
1781
}
1748
1782
1783
+ /// Calculate the distinct exception of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema
1784
+ ///
1785
+ /// ```
1786
+ /// # use datafusion::prelude::*;
1787
+ /// # use datafusion::error::Result;
1788
+ /// # use datafusion_common::assert_batches_sorted_eq;
1789
+ /// # #[tokio::main]
1790
+ /// # async fn main() -> Result<()> {
1791
+ /// let ctx = SessionContext::new();
1792
+ /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
1793
+ /// let d2 = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
1794
+ /// let result = df.except_distinct(d2)?;
1795
+ /// // those columns are not in example.csv, but in example_long.csv
1796
+ /// let expected = vec![
1797
+ /// "+---+---+---+",
1798
+ /// "| a | b | c |",
1799
+ /// "+---+---+---+",
1800
+ /// "| 4 | 5 | 6 |",
1801
+ /// "| 7 | 8 | 9 |",
1802
+ /// "+---+---+---+"
1803
+ /// ];
1804
+ /// # assert_batches_sorted_eq!(expected, &result.collect().await?);
1805
+ /// # Ok(())
1806
+ /// # }
1807
+ /// ```
1808
+ pub fn except_distinct ( self , dataframe : DataFrame ) -> Result < DataFrame > {
1809
+ let left_plan = self . plan ;
1810
+ let right_plan = dataframe. plan ;
1811
+ let plan = LogicalPlanBuilder :: except ( left_plan, right_plan, false ) ?;
1812
+ Ok ( DataFrame {
1813
+ session_state : self . session_state ,
1814
+ plan,
1815
+ projection_requires_validation : true ,
1816
+ } )
1817
+ }
1818
+
1749
1819
/// Execute this `DataFrame` and write the results to `table_name`.
1750
1820
///
1751
1821
/// Returns a single [RecordBatch] containing a single column and
0 commit comments