Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue585 and issue 614 #35

Merged
merged 3 commits into from
Apr 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions docs/gff3_fix.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,19 @@ Python 2.7
1. Corrected GFF3

## Quick start
`python2.7 bin/gff3_fix.py -qc_r error.txt -g example.gff3 -og corrected.gff3`
`python2.7 bin/gff3_fix.py -qc_r error.txt -g example_file/example.gff3 -og corrected.gff3`

## Optional arguments

1. -h, --help
- show this help message and exit
2. -og OUTPUT_GFF, --output_gff OUTPUT_GFF
- output gff3 file name
3. -v, --version
2. -qc_r QC_REPORT, --qc_report QC_REPORT
- Error report from gff3_QC.py
3. -g GFF, --gff GFF
- Genome annotation file, gff3 format
4. -og OUTPUT_GFF, --output_gff OUTPUT_GFF
- output gff3 file name (default: corrected.gff3)
5. -v, --version
- show program's version number and exit

## More information
Expand Down
26 changes: 19 additions & 7 deletions docs/gff3_merge.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Usage

gff3_merge.py [-h] [-g1 GFF_FILE1] [-g2 GFF_FILE2] [-f FASTA] [-og OUTPUT_GFF] [-r REPORT_FILE] [-noAuto] [-v]
gff3_merge.py [-h] [-g1 GFF_FILE1] [-g2 GFF_FILE2] [-f FASTA] [-u1 USER_DEFINED_FILE1] [-u2 USER_DEFINED_FILE2] [-og OUTPUT_GFF] [-r REPORT_FILE] [-a] [-noAuto] [-v]

## Testing environment

Expand All @@ -20,22 +20,34 @@ gff3_merge.py [-h] [-g1 GFF_FILE1] [-g2 GFF_FILE2] [-f FASTA] [-og OUTPUT_GFF] [

## Quick start
* Merge the two files with auto-assignment of replace tags (default)
`python2.7 GFF3toolkit/bin/gff3_merge.py -g1 GFF3toolkit/example_file/new_models.gff3 -g2 GFF3toolkit/example_file/reference.gff3 -f GFF3toolkit/example_file/reference.fa -og merged.gff -r merged_report.txt`
`python2.7 bin/gff3_merge.py -g1 example_file/new_models.gff3 -g2 example_file/reference.gff3 -f example_file/reference.fa -og merged.gff -r merged_report.txt`

* If your GFF3 files have proper replace tags at column 9 (Format: replace=[Transcript ID]), you can merge the two GFF3 files without auto-assignment of replace tags.
`python2.7 GFF3toolkit/bin/gff3_merge.py -g1 GFF3toolkit/example_file/new_models_w_replace.gff3 -g2 GFF3toolkit/example_file/reference.gff3 -f GFF3toolkit/example_file/reference.fa -og merged.gff -r merged_report.txt -noAuto`
`python2.7 bin/gff3_merge.py -g1 example_file/new_models_w_replace.gff3 -g2 example_file/reference.gff3 -f example_file/reference.fa -og merged.gff -r merged_report.txt -noAuto`

## Optional arguments

1. -h, --help
- show this help message and exit
2. -og OUTPUT_GFF, --output_gff OUTPUT_GFF
2. -g1 GFF_FILE1, --gff_file1 GFF_FILE1
- Updated GFF3 file, such as Apollo gff
3. -g2 GFF_FILE2, --gff_file2 GFF_FILE2
- Reference GFF3 file, such as Maker gff or OGS gff
4. -f FASTA, --fasta FASTA
- Genomic sequences in the fasta format
5. -u1 USER_DEFINED_FILE1, --user_defined_file1 USER_DEFINED_FILE1
- File for specifing parent and child features for fasta extraction from updated GFF3 file.
6. -u2 USER_DEFINED_FILE2, --user_defined_file2 USER_DEFINED_FILE2
- File for specifing parent and child features for fasta extraction from reference GFF3 file.
7. -og OUTPUT_GFF, --output_gff OUTPUT_GFF
- The merged GFF3 file (default: merged.gff)
3. -r REPORT_FILE, --report_file REPORT_FILE
8. -r REPORT_FILE, --report_file REPORT_FILE
- Log file for the integration (default: merge_report.txt)
4. -noAuto, --auto_assignment
9. -a, --all
- auto-assignment replace tags for all transcript features. (default: Only automatically assign replace tags for the transcript without replace tags)
10. -noAuto, --auto_assignment
- Turn off the auto-assignment of replace tags, if you have had the replace tags in your update gff (default: Automatically assign replace tags and then merge the gff files)
5. -v, --version
11. -v, --version
- show program's version number and exit

## More information
Expand Down
17 changes: 15 additions & 2 deletions gff3tool/bin/gff3_fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,28 @@ def script_main():


Quick start:
python2.7 bin/gff3_fix.py -qc_r error.txt -g example.gff3 -og corrected.gff3
python2.7 bin/gff3_fix.py -qc_r error.txt -g example_file/example.gff3 -og corrected.gff3
"""))

parser.add_argument('-qc_r', '--qc_report', type=str, help='Error report from gff3_QC.py')
parser.add_argument('-g', '--gff', type=str, help='Genome annotation file, gff3 format')
#parser.add_argument('-r', '--report', type=str, help='output report file name')
parser.add_argument('-og', '--output_gff', type=str, help='output gff3 file name')
parser.add_argument('-og', '--output_gff', type=str, help='output gff3 file name', default='corrected.gff3')
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)

args = parser.parse_args()
if args.qc_report:
logger_stderr.info('Checking QC report file (%s)...', args.qc_report)
else: # no input
parser.print_help()
sys.exit()

if args.gff:
logger_stderr.info('Checking GFF3 file (%s)...', args.gff)
else: # no input
parser.print_help()
sys.exit()

logger_stderr.info('Reading QC report file: (%s)...\n', args.qc_report)
#error_dict example: {'Emr0001': [[15,16],[13]],'Esf0005': [[17]]}
error_dict = {}
Expand Down
13 changes: 8 additions & 5 deletions gff3tool/bin/gff3_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def check_replace(gff, user_defined1=None):
return False


def main(gff_file1, gff_file2, fasta, report, output_gff, auto=True, user_defined1=None, user_defined2=None, logger=None):
def main(gff_file1, gff_file2, fasta, report, output_gff, all_assign=False, auto=True, user_defined1=None, user_defined2=None, logger=None):
logger_null = logging.getLogger(__name__+'null')
null_handler = logging.NullHandler()
logger_null.addHandler(null_handler)
Expand All @@ -66,8 +66,8 @@ def main(gff_file1, gff_file2, fasta, report, output_gff, auto=True, user_define
autoReviseReport = '{0:s}/replace_tag_report.txt'.format(autoDIR)

logger.info('========== Auto-assignment of replace tags for each transcript model ==========')
gff3_merge.auto_replace_tag.main(gff1=gff_file1, gff2=gff_file2, fasta=fasta, outdir=autoDIR, scode='TEMP', user_defined1=user_defined1, user_defined2=user_defined2, logger=logger)
gff3_merge.revision.main(gff_file1, autoFILE, autoReviseGff, autoReviseReport, user_defined1, auto, logger)
gff3_merge.auto_replace_tag.main(gff1=gff_file1, gff2=gff_file2, fasta=fasta, outdir=autoDIR, scode='TEMP', all_assign=all_assign, user_defined1=user_defined1, user_defined2=user_defined2, logger=logger)
gff3_merge.revision.main(gff_file=gff_file1, revision_file=autoFILE, output_gff=autoReviseGff, report_file=autoReviseReport, user_defined1=user_defined1, auto=auto, logger=logger)

logger.info('========== Check whether there are missing replace tags ==========')
gff3 = Gff3(gff_file=autoReviseGff, logger=logger_null)
Expand Down Expand Up @@ -141,6 +141,7 @@ def script_main():
parser.add_argument('-u2', '--user_defined_file2', type=str, help='File for specifing parent and child features for fasta extraction from reference GFF3 file.')
parser.add_argument('-og', '--output_gff', type=str, help='The merged GFF3 file')
parser.add_argument('-r', '--report_file', type=str, help='Log file for the integration')
parser.add_argument('-a', '--all', action='store_true', help='auto-assignment replace tags for all transcript features. (default: Only automatically assign replace tags for the transcript without replace tags)')
parser.add_argument('-noAuto', '--auto_assignment', action='store_false', help='Turn off the auto-assignment of replace tags, if you already have replace tags in your updated gff (default: Automatically assign replace tags and then merge the gff files)')
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)

Expand Down Expand Up @@ -214,7 +215,9 @@ def script_main():
parser.print_help()
sys.exit(1)


if args.all and not args.auto_assignment:
logger_stderr.error('-a and -noAuto specify opposite behaviors, only one of the two arguments can be accepted.')
sys.exit(0)
if args.report_file:
logger_stderr.info('Writing validation report (%s)...\n', args.report_file)
report_fh = open(args.report_file, 'wb')
Expand All @@ -224,4 +227,4 @@ def script_main():
if not args.output_gff:
args.output_gff='merged.gff'

main(args.gff_file1, args.gff_file2, args.fasta, report_fh, args.output_gff, args.auto_assignment, args.user_defined_file1, args.user_defined_file2, logger=logger_stderr)
main(args.gff_file1, args.gff_file2, args.fasta, report_fh, args.output_gff, args.all, args.auto_assignment, args.user_defined_file1, args.user_defined_file2, logger=logger_stderr)
18 changes: 15 additions & 3 deletions gff3tool/lib/gff3_merge/auto_replace_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
__version__ = '0.0.3'


def main(gff1, gff2, fasta, outdir, scode, logger, user_defined1=None, user_defined2=None):
def main(gff1, gff2, fasta, outdir, scode, logger, all_assign=False, user_defined1=None, user_defined2=None):
logger_null = logging.getLogger(__name__+'null')
null_handler = logging.NullHandler()
logger_null.addHandler(null_handler)
Expand All @@ -51,8 +51,12 @@ def main(gff1, gff2, fasta, outdir, scode, logger, user_defined1=None, user_defi
roots =[]
for line in gff3_1.lines:
try:
if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent') and len(line['attributes']) != 0:
roots.append(line)
if line['line_type'] == 'feature':
# remove all the replace attributes
if all_assign and 'replace' in line['attributes']:
del line['attributes']['replace']
if 'Parent' not in line['attributes'] and len(line['attributes']) != 0:
roots.append(line)
except:
pass
for root in roots:
Expand All @@ -75,11 +79,19 @@ def main(gff1, gff2, fasta, outdir, scode, logger, user_defined1=None, user_defi
for lines in user_defined1:
transcripts_type.add(lines[0])
for line in gff3_1.lines:
if line['line_type'] == 'feature':
if all_assign and 'replace' in line['attributes']:
del line['attributes']['replace']
if line['type'] in transcripts_type:
id = str()
if line['attributes'].has_key('ID'):
id = line['attributes']['ID']
transcripts.add(id)
if all_assign:
# modified gff1 without any relace attributes
gff3_1_mod = '{0:s}/{1:s}'.format(tmpdir, 'gff1_mod.gff3')
gff3_1.write(gff3_1_mod)
gff1 = gff3_1_mod

out1_type = '{0:s}/{1:s}'.format(tmpdir, 'gff1_transcript_type.txt')
with open(out1_type, "w") as trans_type:
Expand Down
4 changes: 2 additions & 2 deletions gff3tool/lib/gff3_merge/revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

__version__ = '1.0.3'

def main(gff_file, revision_file, output_gff, report_file=None,user_defined1=None, auto=True,logger=None):
def main(gff_file, revision_file, output_gff, report_file=None, user_defined1=None, auto=True, logger=None):
logger_null = logging.getLogger(__name__+'null')
null_handler = logging.NullHandler()
logger_null.addHandler(null_handler)
Expand Down Expand Up @@ -331,4 +331,4 @@ def main(gff_file, revision_file, output_gff, report_file=None,user_defined1=Non
if not args.output_gff:
args.output_gff = 'Revised_{0:s}'.format(args.gff_file)

main(args.gff_file, args.revision_file, args.output_gff, args.report_file, logger=logger_stderr)
main(gff_file=args.gff_file, revision_file=args.revision_file, output_gff=args.output_gff, report_file=args.report_file, logger=logger_stderr)