diff --git a/0.2.10/404.html b/0.2.10/404.html deleted file mode 100644 index b3541e8..0000000 --- a/0.2.10/404.html +++ /dev/null @@ -1,447 +0,0 @@ - - - -
- - - - - - - - - - - - - - -nearest(df1, df2, overlap_filter=FilterOp.Strict, suffixes=('_1', '_2'), on_cols=None, col1=None, col2=None, output_type='polars.LazyFrame')
-
-Find pairs of overlapping genomic intervals. -Bioframe inspired API.
- - -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
- df1
- |
-
- Union[str, DataFrame, LazyFrame, DataFrame]
- |
-
-
-
- Can be a path to a file, a polars DataFrame, or a pandas DataFrame. CSV with a header and Parquet are supported. - |
- - required - | -
- df2
- |
-
- Union[str, DataFrame, LazyFrame, DataFrame]
- |
-
-
-
- Can be a path to a file, a polars DataFrame, or a pandas DataFrame. CSV with a header and Parquet are supported. - |
- - required - | -
- overlap_filter
- |
-
- FilterOp
- |
-
-
-
- FilterOp, optional. The type of overlap to consider(Weak or Strict). default is FilterOp.Weak. - |
-
- Strict
- |
-
- col1
- |
-
- Union[list[str], None]
- |
-
-
-
- The names of columns containing the chromosome, start and end of the -genomic intervals, provided separately for each set. The default -values are 'contig', 'pos_start', 'pos_end'. - |
-
- None
- |
-
- col2
- |
-
- Union[list[str], None]
- |
-
-
-
- The names of columns containing the chromosome, start and end of the -genomic intervals, provided separately for each set. The default -values are 'contig', 'pos_start', 'pos_end'. - |
-
- None
- |
-
- suffixes
- |
-
- (str, str)
- |
-
-
-
- Suffixes for the columns of the two overlapped sets. - |
-
- ('_1', '_2')
- |
-
- on_cols
- |
-
- Union[list[str], None]
- |
-
-
-
- List of additional column names to join on. default is None. - |
-
- None
- |
-
- output_type
- |
-
- str
- |
-
-
-
- Type of the output. default is "polars.LazyFrame", "polars.DataFrame", or "pandas.DataFrame" are also supported. - |
-
- 'polars.LazyFrame'
- |
-
Returns:
-Type | -Description | -
---|---|
- Union[LazyFrame, DataFrame, DataFrame]
- |
-
-
-
- polars.LazyFrame or polars.DataFrame or pandas.DataFrame of the overlapping intervals. - |
-
The default output format, i.e. LazyFrame, is recommended for large datasets as it supports output streaming and lazy evaluation. -This enables efficient processing of large datasets without loading the entire output dataset into memory.
-Example:
- - -Support for col1, col2, and on_cols and suffixes parameters.
-polars_bio/range_op.py
overlap(df1, df2, how='inner', overlap_filter=FilterOp.Strict, suffixes=('_1', '_2'), on_cols=None, col1=None, col2=None, output_type='polars.LazyFrame')
-
-Find pairs of overlapping genomic intervals. -Bioframe inspired API.
- - -Parameters:
-Name | -Type | -Description | -Default | -
---|---|---|---|
- df1
- |
-
- Union[str, DataFrame, LazyFrame, DataFrame]
- |
-
-
-
- Can be a path to a file, a polars DataFrame, or a pandas DataFrame. CSV with a header and Parquet are supported. - |
- - required - | -
- df2
- |
-
- Union[str, DataFrame, LazyFrame, DataFrame]
- |
-
-
-
- Can be a path to a file, a polars DataFrame, or a pandas DataFrame. CSV with a header and Parquet are supported. - |
- - required - | -
- how
- |
-
- str
- |
-
-
-
- How to handle the overlaps on the two dataframes. inner: use intersection of the set of intervals from df1 and df2, optional. - |
-
- 'inner'
- |
-
- overlap_filter
- |
-
- FilterOp
- |
-
-
-
- FilterOp, optional. The type of overlap to consider(Weak or Strict). default is FilterOp.Weak. - |
-
- Strict
- |
-
- col1
- |
-
- Union[list[str], None]
- |
-
-
-
- The names of columns containing the chromosome, start and end of the -genomic intervals, provided separately for each set. The default -values are 'contig', 'pos_start', 'pos_end'. - |
-
- None
- |
-
- col2
- |
-
- Union[list[str], None]
- |
-
-
-
- The names of columns containing the chromosome, start and end of the -genomic intervals, provided separately for each set. The default -values are 'contig', 'pos_start', 'pos_end'. - |
-
- None
- |
-
- suffixes
- |
-
- tuple[str]
- |
-
-
-
- Suffixes for the columns of the two overlapped sets. - |
-
- ('_1', '_2')
- |
-
- on_cols
- |
- - | -
-
-
- List of additional column names to join on. default is None. - |
-
- None
- |
-
- output_type
- |
-
- str
- |
-
-
-
- Type of the output. default is "polars.LazyFrame", "polars.DataFrame", or "pandas.DataFrame" are also supported. - |
-
- 'polars.LazyFrame'
- |
-
Returns:
-Type | -Description | -
---|---|
- Union[LazyFrame, DataFrame, DataFrame]
- |
-
-
-
- polars.LazyFrame or polars.DataFrame or pandas.DataFrame of the overlapping intervals. - |
-
The default output format, i.e. LazyFrame, is recommended for large datasets as it supports output streaming and lazy evaluation. -This enables efficient processing of large datasets without loading the entire output dataset into memory.
-import polars_bio as pb
-import pandas as pd
-
-df1 = pd.DataFrame([
- ['chr1', 1, 5],
- ['chr1', 3, 8],
- ['chr1', 8, 10],
- ['chr1', 12, 14]],
-columns=['contig', 'pos_start', 'pos_end']
-)
-
-df2 = pd.DataFrame(
-[['chr1', 4, 8],
- ['chr1', 10, 11]],
-columns=['contig', 'pos_start', 'pos_end' ]
-)
-overlapping_intervals = pb.overlap(df1, df2, output_type="pandas.DataFrame")
-
-overlapping_intervals
- contig_1 pos_start_1 pos_end_1 contig_2 pos_start_2 pos_end_2
-0 chr1 1 5 chr1 4 8
-1 chr1 3 8 chr1 4 8
-
Support for col1, col2, and on_cols and suffixes parameters.
-polars_bio/range_op.py
18 -19 -20 -21 -22 -23 -24 -25 -26 -27 -28 -29 -30 -31 -32 -33 -34 -35 -36 -37 -38 -39 -40 -41 -42 -43 -44 -45 -46 -47 -48 -49 -50 -51 -52 -53 -54 -55 -56 -57 -58 -59 -60 -61 -62 -63 -64 -65 -66 -67 -68 -69 -70 -71 -72 -73 -74 -75 -76 -77 -78 -79 -80 -81 -82 -83 -84 -85 -86 -87 -88 -89 -90 -91 -92 -93 |
|