sep241util module#

exception sep241util.BEDFormatError#

Bases: Exception

exception sep241util.BarcodeError#

Bases: Exception

class sep241util.DataManager(fragments_files={}, gtf_file=None, show_progress=True, comment='#', remove_pcr_duplicates=True)#

Bases: object

A utility class for sep241 deconvolution.

all_fragments(barcode=None, replace_barcode=None)#
count(bins, index_name=None, normalize=False, count_prior=0)#
envents_from_intervals(interval_df)#
file_length = {}#
get_fragments(feature, max_dist=0, include_events=True, nuc_size=120, _return_bounds=False, with_name=False)#
get_intervals(region, nuc_size=120, with_name=False)#
property gtf#
property gtf_file#
hist_from_df(len_df, bin_width=1, normalize=True, color='from', count_prior=0, bins=None)#
length_histograms(bin_width=1, normalize=True, color='from', count_prior=0, bins=None)#
plot_frag_length(log_scale=True)#
plot_genes(genes, x='pseudotime', points=True, regression=True, std=True)#
plot_igv(feature, max_dist=0, nuc_size=120, group=None, color='from', group_nucs=True, sort_by=None, obs_from=None, y_scale='free', draw_peaks=False, min_counts=0, min_density=1e-10, smooth_events_bw=None, mark_nucs=[], anno_plot=True, color_anno_by='gene', full_label=False, igv_border=True, **kwargs)#

A plot similar to the IGV visualization of ATAC reads.

Parameters:
  • feature – A gene, genomic interval or df of ATAC reads.

  • max_distance – Maximal distance around the selected feature to show.

  • nuc_size – Length of ATAC reads at which to classify as nucleosome reads.

  • group – Used in ggplot for faceting. Must be a subset of color.

  • color – Used in ggplot for coloring of tracks. Must be a superset of group.

  • group_nucs – Put nucleosome reads in a subgroup.

  • sort_by – Sort groups and color by this column.

  • obs_from – Select annData object. Must be ATAC or RNA.

  • y_scale – Must be fixed or free.

  • draw_peaks – Mark peaks selected in ATAC annData object.

  • min_counts – Tracks with less than that will not be plotted.

  • smooth_events_bw – If set to number draw event kde with that bw instead of igv signal.

  • mark_nucs – List of nucleosome positions to mark.

  • anno_plot – Call plot_gtf to show genes and genomic features.

  • color_anno_by – E.g. features or genes.

  • full_label – Set to True to write full label into facet headers.

  • **kwargs – Args passed to plot_gtf.

plot_length_hist(gdf=None, bin_width=1, normalize=True, color='from', count_prior=0, bins=None)#
class sep241util.LevelSet(interval_padding, base_level=0, increments=-2)#

Bases: object

Used by the gtf drawing tool.

Remembers levels occupied by intervals if called for all intervals sorted by their start. Ensures a minimal padding distance between intervals in same level.

get_free(interval_start, interval_end)#
reset()#
exception sep241util.MissingData#

Bases: Exception

exception sep241util.NoData#

Bases: Exception

exception sep241util.NotARegion#

Bases: Exception

sep241util.check_length_distribution_flip(workdata, map_results, threshold=0.9)#

Look for flip between posterior length distributions.

Parameters:

threshold – Pearson correlation threshold for difference between c1 and c2 (default=.9).

sep241util.detect_cores(cores=None)#
sep241util.format_cuts(locations_ds)#

Formats cuts as represented in the model.

sep241util.igv_plot(df, color=None, group=None, facet_scales='free_y', min_counts=0, min_density=1e-10, sort_by=None, relabeling={}, smooth_events=None, max_res=1000, border=True)#

Draws an IGV-like plot. Used by Multitool class.

Parameters:
  • df – DataFrame with reads and columns ‘start’, ‘stop’ and columns specified by group and color.

  • color – Column of df to color tracks by. Must be coarser than groups.

  • group – Column of df to draw different tracks per group. Must be finer than color.

  • facet_scales – passed to ggplot facet_grid to scale axis of tracks.

  • min_count – Filter out tracks with less reads (default: 0).

  • sort_by – Column of df. Sort tracks by the median within group. (default: ‘rna_pseudotime’)

  • relabeling – Dict to rename track names.

  • smooth_events – Plot kde of event rates instead of IGV if not None. Use value as bw_method in scipy.stats.gaussian_kde.

  • max_res – Maximum grid resolution when plotting smooth_events.

sep241util.length_dist(weights, sigmas=[0.4, 0.18, 0.115, 0.085], modes=[70, 200, 400, 600], x=array([1., 2.6012024, 4.20240481, 5.80360721, 7.40480962, 9.00601202, 10.60721443, 12.20841683, 13.80961924, 15.41082164, 17.01202405, 18.61322645, 20.21442886, 21.81563126, 23.41683367, 25.01803607, 26.61923848, 28.22044088, 29.82164329, 31.42284569, 33.0240481, 34.6252505, 36.22645291, 37.82765531, 39.42885772, 41.03006012, 42.63126253, 44.23246493, 45.83366733, 47.43486974, 49.03607214, 50.63727455, 52.23847695, 53.83967936, 55.44088176, 57.04208417, 58.64328657, 60.24448898, 61.84569138, 63.44689379, 65.04809619, 66.6492986, 68.250501, 69.85170341, 71.45290581, 73.05410822, 74.65531062, 76.25651303, 77.85771543, 79.45891784, 81.06012024, 82.66132265, 84.26252505, 85.86372745, 87.46492986, 89.06613226, 90.66733467, 92.26853707, 93.86973948, 95.47094188, 97.07214429, 98.67334669, 100.2745491, 101.8757515, 103.47695391, 105.07815631, 106.67935872, 108.28056112, 109.88176353, 111.48296593, 113.08416834, 114.68537074, 116.28657315, 117.88777555, 119.48897796, 121.09018036, 122.69138277, 124.29258517, 125.89378758, 127.49498998, 129.09619238, 130.69739479, 132.29859719, 133.8997996, 135.501002, 137.10220441, 138.70340681, 140.30460922, 141.90581162, 143.50701403, 145.10821643, 146.70941884, 148.31062124, 149.91182365, 151.51302605, 153.11422846, 154.71543086, 156.31663327, 157.91783567, 159.51903808, 161.12024048, 162.72144289, 164.32264529, 165.9238477, 167.5250501, 169.12625251, 170.72745491, 172.32865731, 173.92985972, 175.53106212, 177.13226453, 178.73346693, 180.33466934, 181.93587174, 183.53707415, 185.13827655, 186.73947896, 188.34068136, 189.94188377, 191.54308617, 193.14428858, 194.74549098, 196.34669339, 197.94789579, 199.5490982, 201.1503006, 202.75150301, 204.35270541, 205.95390782, 207.55511022, 209.15631263, 210.75751503, 212.35871743, 213.95991984, 215.56112224, 217.16232465, 218.76352705, 220.36472946, 221.96593186, 223.56713427, 225.16833667, 226.76953908, 228.37074148, 229.97194389, 231.57314629, 233.1743487, 234.7755511, 236.37675351, 237.97795591, 239.57915832, 241.18036072, 242.78156313, 244.38276553, 245.98396794, 247.58517034, 249.18637275, 250.78757515, 252.38877756, 253.98997996, 255.59118236, 257.19238477, 258.79358717, 260.39478958, 261.99599198, 263.59719439, 265.19839679, 266.7995992, 268.4008016, 270.00200401, 271.60320641, 273.20440882, 274.80561122, 276.40681363, 278.00801603, 279.60921844, 281.21042084, 282.81162325, 284.41282565, 286.01402806, 287.61523046, 289.21643287, 290.81763527, 292.41883768, 294.02004008, 295.62124248, 297.22244489, 298.82364729, 300.4248497, 302.0260521, 303.62725451, 305.22845691, 306.82965932, 308.43086172, 310.03206413, 311.63326653, 313.23446894, 314.83567134, 316.43687375, 318.03807615, 319.63927856, 321.24048096, 322.84168337, 324.44288577, 326.04408818, 327.64529058, 329.24649299, 330.84769539, 332.4488978, 334.0501002, 335.65130261, 337.25250501, 338.85370741, 340.45490982, 342.05611222, 343.65731463, 345.25851703, 346.85971944, 348.46092184, 350.06212425, 351.66332665, 353.26452906, 354.86573146, 356.46693387, 358.06813627, 359.66933868, 361.27054108, 362.87174349, 364.47294589, 366.0741483, 367.6753507, 369.27655311, 370.87775551, 372.47895792, 374.08016032, 375.68136273, 377.28256513, 378.88376754, 380.48496994, 382.08617234, 383.68737475, 385.28857715, 386.88977956, 388.49098196, 390.09218437, 391.69338677, 393.29458918, 394.89579158, 396.49699399, 398.09819639, 399.6993988, 401.3006012, 402.90180361, 404.50300601, 406.10420842, 407.70541082, 409.30661323, 410.90781563, 412.50901804, 414.11022044, 415.71142285, 417.31262525, 418.91382766, 420.51503006, 422.11623246, 423.71743487, 425.31863727, 426.91983968, 428.52104208, 430.12224449, 431.72344689, 433.3246493, 434.9258517, 436.52705411, 438.12825651, 439.72945892, 441.33066132, 442.93186373, 444.53306613, 446.13426854, 447.73547094, 449.33667335, 450.93787575, 452.53907816, 454.14028056, 455.74148297, 457.34268537, 458.94388778, 460.54509018, 462.14629259, 463.74749499, 465.34869739, 466.9498998, 468.5511022, 470.15230461, 471.75350701, 473.35470942, 474.95591182, 476.55711423, 478.15831663, 479.75951904, 481.36072144, 482.96192385, 484.56312625, 486.16432866, 487.76553106, 489.36673347, 490.96793587, 492.56913828, 494.17034068, 495.77154309, 497.37274549, 498.9739479, 500.5751503, 502.17635271, 503.77755511, 505.37875752, 506.97995992, 508.58116232, 510.18236473, 511.78356713, 513.38476954, 514.98597194, 516.58717435, 518.18837675, 519.78957916, 521.39078156, 522.99198397, 524.59318637, 526.19438878, 527.79559118, 529.39679359, 530.99799599, 532.5991984, 534.2004008, 535.80160321, 537.40280561, 539.00400802, 540.60521042, 542.20641283, 543.80761523, 545.40881764, 547.01002004, 548.61122244, 550.21242485, 551.81362725, 553.41482966, 555.01603206, 556.61723447, 558.21843687, 559.81963928, 561.42084168, 563.02204409, 564.62324649, 566.2244489, 567.8256513, 569.42685371, 571.02805611, 572.62925852, 574.23046092, 575.83166333, 577.43286573, 579.03406814, 580.63527054, 582.23647295, 583.83767535, 585.43887776, 587.04008016, 588.64128257, 590.24248497, 591.84368737, 593.44488978, 595.04609218, 596.64729459, 598.24849699, 599.8496994, 601.4509018, 603.05210421, 604.65330661, 606.25450902, 607.85571142, 609.45691383, 611.05811623, 612.65931864, 614.26052104, 615.86172345, 617.46292585, 619.06412826, 620.66533066, 622.26653307, 623.86773547, 625.46893788, 627.07014028, 628.67134269, 630.27254509, 631.87374749, 633.4749499, 635.0761523, 636.67735471, 638.27855711, 639.87975952, 641.48096192, 643.08216433, 644.68336673, 646.28456914, 647.88577154, 649.48697395, 651.08817635, 652.68937876, 654.29058116, 655.89178357, 657.49298597, 659.09418838, 660.69539078, 662.29659319, 663.89779559, 665.498998, 667.1002004, 668.70140281, 670.30260521, 671.90380762, 673.50501002, 675.10621242, 676.70741483, 678.30861723, 679.90981964, 681.51102204, 683.11222445, 684.71342685, 686.31462926, 687.91583166, 689.51703407, 691.11823647, 692.71943888, 694.32064128, 695.92184369, 697.52304609, 699.1242485, 700.7254509, 702.32665331, 703.92785571, 705.52905812, 707.13026052, 708.73146293, 710.33266533, 711.93386774, 713.53507014, 715.13627255, 716.73747495, 718.33867735, 719.93987976, 721.54108216, 723.14228457, 724.74348697, 726.34468938, 727.94589178, 729.54709419, 731.14829659, 732.749499, 734.3507014, 735.95190381, 737.55310621, 739.15430862, 740.75551102, 742.35671343, 743.95791583, 745.55911824, 747.16032064, 748.76152305, 750.36272545, 751.96392786, 753.56513026, 755.16633267, 756.76753507, 758.36873747, 759.96993988, 761.57114228, 763.17234469, 764.77354709, 766.3747495, 767.9759519, 769.57715431, 771.17835671, 772.77955912, 774.38076152, 775.98196393, 777.58316633, 779.18436874, 780.78557114, 782.38677355, 783.98797595, 785.58917836, 787.19038076, 788.79158317, 790.39278557, 791.99398798, 793.59519038, 795.19639279, 796.79759519, 798.3987976, 800.]))#
sep241util.plot_gtf(gtf_dataframe, draw_genes=True, draw_exons=True, draw_transcripts=True, draw_codons=True, draw_utr=True, draw_labels=True, padding=200)#

Draw features of a gtf DataFrame. Used by Multitool.

Parameters:
  • gtf_dataframe – A pandas data frame from a gtf parsed by gtfparse.

  • draw_genes – Bool, if genes should be included.

  • draw_exons – Bool, if exons should be drwan.

  • draw_transcripts – Bool, if transcripts should be drawn (in a seperate track).

  • draw_codons – Bool, if start and stop codon should be marked.

  • draw_utr – Bool, if UTR should be drawn.

  • draw_labels – Bool, if labels with gene names should be dran on genes.

  • padding – Minimal horizontal distance in bp between drawn genes/transcripts. If features are too close, they a shifted vertically.

sep241util.posterior_length_dists(map_results, x=array([1., 2.6012024, 4.20240481, 5.80360721, 7.40480962, 9.00601202, 10.60721443, 12.20841683, 13.80961924, 15.41082164, 17.01202405, 18.61322645, 20.21442886, 21.81563126, 23.41683367, 25.01803607, 26.61923848, 28.22044088, 29.82164329, 31.42284569, 33.0240481, 34.6252505, 36.22645291, 37.82765531, 39.42885772, 41.03006012, 42.63126253, 44.23246493, 45.83366733, 47.43486974, 49.03607214, 50.63727455, 52.23847695, 53.83967936, 55.44088176, 57.04208417, 58.64328657, 60.24448898, 61.84569138, 63.44689379, 65.04809619, 66.6492986, 68.250501, 69.85170341, 71.45290581, 73.05410822, 74.65531062, 76.25651303, 77.85771543, 79.45891784, 81.06012024, 82.66132265, 84.26252505, 85.86372745, 87.46492986, 89.06613226, 90.66733467, 92.26853707, 93.86973948, 95.47094188, 97.07214429, 98.67334669, 100.2745491, 101.8757515, 103.47695391, 105.07815631, 106.67935872, 108.28056112, 109.88176353, 111.48296593, 113.08416834, 114.68537074, 116.28657315, 117.88777555, 119.48897796, 121.09018036, 122.69138277, 124.29258517, 125.89378758, 127.49498998, 129.09619238, 130.69739479, 132.29859719, 133.8997996, 135.501002, 137.10220441, 138.70340681, 140.30460922, 141.90581162, 143.50701403, 145.10821643, 146.70941884, 148.31062124, 149.91182365, 151.51302605, 153.11422846, 154.71543086, 156.31663327, 157.91783567, 159.51903808, 161.12024048, 162.72144289, 164.32264529, 165.9238477, 167.5250501, 169.12625251, 170.72745491, 172.32865731, 173.92985972, 175.53106212, 177.13226453, 178.73346693, 180.33466934, 181.93587174, 183.53707415, 185.13827655, 186.73947896, 188.34068136, 189.94188377, 191.54308617, 193.14428858, 194.74549098, 196.34669339, 197.94789579, 199.5490982, 201.1503006, 202.75150301, 204.35270541, 205.95390782, 207.55511022, 209.15631263, 210.75751503, 212.35871743, 213.95991984, 215.56112224, 217.16232465, 218.76352705, 220.36472946, 221.96593186, 223.56713427, 225.16833667, 226.76953908, 228.37074148, 229.97194389, 231.57314629, 233.1743487, 234.7755511, 236.37675351, 237.97795591, 239.57915832, 241.18036072, 242.78156313, 244.38276553, 245.98396794, 247.58517034, 249.18637275, 250.78757515, 252.38877756, 253.98997996, 255.59118236, 257.19238477, 258.79358717, 260.39478958, 261.99599198, 263.59719439, 265.19839679, 266.7995992, 268.4008016, 270.00200401, 271.60320641, 273.20440882, 274.80561122, 276.40681363, 278.00801603, 279.60921844, 281.21042084, 282.81162325, 284.41282565, 286.01402806, 287.61523046, 289.21643287, 290.81763527, 292.41883768, 294.02004008, 295.62124248, 297.22244489, 298.82364729, 300.4248497, 302.0260521, 303.62725451, 305.22845691, 306.82965932, 308.43086172, 310.03206413, 311.63326653, 313.23446894, 314.83567134, 316.43687375, 318.03807615, 319.63927856, 321.24048096, 322.84168337, 324.44288577, 326.04408818, 327.64529058, 329.24649299, 330.84769539, 332.4488978, 334.0501002, 335.65130261, 337.25250501, 338.85370741, 340.45490982, 342.05611222, 343.65731463, 345.25851703, 346.85971944, 348.46092184, 350.06212425, 351.66332665, 353.26452906, 354.86573146, 356.46693387, 358.06813627, 359.66933868, 361.27054108, 362.87174349, 364.47294589, 366.0741483, 367.6753507, 369.27655311, 370.87775551, 372.47895792, 374.08016032, 375.68136273, 377.28256513, 378.88376754, 380.48496994, 382.08617234, 383.68737475, 385.28857715, 386.88977956, 388.49098196, 390.09218437, 391.69338677, 393.29458918, 394.89579158, 396.49699399, 398.09819639, 399.6993988, 401.3006012, 402.90180361, 404.50300601, 406.10420842, 407.70541082, 409.30661323, 410.90781563, 412.50901804, 414.11022044, 415.71142285, 417.31262525, 418.91382766, 420.51503006, 422.11623246, 423.71743487, 425.31863727, 426.91983968, 428.52104208, 430.12224449, 431.72344689, 433.3246493, 434.9258517, 436.52705411, 438.12825651, 439.72945892, 441.33066132, 442.93186373, 444.53306613, 446.13426854, 447.73547094, 449.33667335, 450.93787575, 452.53907816, 454.14028056, 455.74148297, 457.34268537, 458.94388778, 460.54509018, 462.14629259, 463.74749499, 465.34869739, 466.9498998, 468.5511022, 470.15230461, 471.75350701, 473.35470942, 474.95591182, 476.55711423, 478.15831663, 479.75951904, 481.36072144, 482.96192385, 484.56312625, 486.16432866, 487.76553106, 489.36673347, 490.96793587, 492.56913828, 494.17034068, 495.77154309, 497.37274549, 498.9739479, 500.5751503, 502.17635271, 503.77755511, 505.37875752, 506.97995992, 508.58116232, 510.18236473, 511.78356713, 513.38476954, 514.98597194, 516.58717435, 518.18837675, 519.78957916, 521.39078156, 522.99198397, 524.59318637, 526.19438878, 527.79559118, 529.39679359, 530.99799599, 532.5991984, 534.2004008, 535.80160321, 537.40280561, 539.00400802, 540.60521042, 542.20641283, 543.80761523, 545.40881764, 547.01002004, 548.61122244, 550.21242485, 551.81362725, 553.41482966, 555.01603206, 556.61723447, 558.21843687, 559.81963928, 561.42084168, 563.02204409, 564.62324649, 566.2244489, 567.8256513, 569.42685371, 571.02805611, 572.62925852, 574.23046092, 575.83166333, 577.43286573, 579.03406814, 580.63527054, 582.23647295, 583.83767535, 585.43887776, 587.04008016, 588.64128257, 590.24248497, 591.84368737, 593.44488978, 595.04609218, 596.64729459, 598.24849699, 599.8496994, 601.4509018, 603.05210421, 604.65330661, 606.25450902, 607.85571142, 609.45691383, 611.05811623, 612.65931864, 614.26052104, 615.86172345, 617.46292585, 619.06412826, 620.66533066, 622.26653307, 623.86773547, 625.46893788, 627.07014028, 628.67134269, 630.27254509, 631.87374749, 633.4749499, 635.0761523, 636.67735471, 638.27855711, 639.87975952, 641.48096192, 643.08216433, 644.68336673, 646.28456914, 647.88577154, 649.48697395, 651.08817635, 652.68937876, 654.29058116, 655.89178357, 657.49298597, 659.09418838, 660.69539078, 662.29659319, 663.89779559, 665.498998, 667.1002004, 668.70140281, 670.30260521, 671.90380762, 673.50501002, 675.10621242, 676.70741483, 678.30861723, 679.90981964, 681.51102204, 683.11222445, 684.71342685, 686.31462926, 687.91583166, 689.51703407, 691.11823647, 692.71943888, 694.32064128, 695.92184369, 697.52304609, 699.1242485, 700.7254509, 702.32665331, 703.92785571, 705.52905812, 707.13026052, 708.73146293, 710.33266533, 711.93386774, 713.53507014, 715.13627255, 716.73747495, 718.33867735, 719.93987976, 721.54108216, 723.14228457, 724.74348697, 726.34468938, 727.94589178, 729.54709419, 731.14829659, 732.749499, 734.3507014, 735.95190381, 737.55310621, 739.15430862, 740.75551102, 742.35671343, 743.95791583, 745.55911824, 747.16032064, 748.76152305, 750.36272545, 751.96392786, 753.56513026, 755.16633267, 756.76753507, 758.36873747, 759.96993988, 761.57114228, 763.17234469, 764.77354709, 766.3747495, 767.9759519, 769.57715431, 771.17835671, 772.77955912, 774.38076152, 775.98196393, 777.58316633, 779.18436874, 780.78557114, 782.38677355, 783.98797595, 785.58917836, 787.19038076, 788.79158317, 790.39278557, 791.99398798, 793.59519038, 795.19639279, 796.79759519, 798.3987976, 800.]), prior=True, progress=False)#
sep241util.posterior_mode_weights(workdata, map_results)#
sep241util.read_job_data(jobdata_file)#
sep241util.read_region_string(feature, short_seqname=True, format_string='([^:]+):([0-9,_]+)-([0-9,_]+)')#
sep241util.read_results(jobdata_file, workdata, progress=True, error=True)#
sep241util.set_flag(flag, value=None)#

Sets or overwrites the theano flag in the environment variable ‘THEANO_FLAGS’.

Parameters:
  • flag – The flag name that is to be overwritten or set.

  • value – The value to be assigned to the flag. If it is None then flag will be pasted as is into ‘THEANO_FLAGS’.

Returns:

The new value of ‘THEANO_FLAGS’.

sep241util.setup_logging(level, logfile=None)#
sep241util.to_grouped_string(list_of_integers)#
sep241util.write_bed(bed_df, out_path)#