############################################################################
# Copyright (c) 2005-2017 ZJU
# All Rights Reserved.
# See file LICENSE for details.
############################################################################

=begin SUMMARY
Main steps:
1. Check the parameters
2. Raw dataset alignment
	key routine: BWA
3. Get candidate circRNAs
	key routine: CIRI2
4. Second alignment
	getting full length of candidate circRNA
	key routine: BWA
5. circRNA-miRNA interaction
	key routine: TargetFinder
6. Output
	annotation and isoforms
=cut

use strict;
use warnings;
use threads;
use Getopt::Long;
use File::Basename;

$| = 1;
my $version = '1.0';

my ( $rawdata, @rawdata, $read, $read1, $read2, $cRNA_out, $ref_dir, $anno_gtf, $miRNA, $help, $max_circle, $low_strigency, $min_mapq, $thread, $chrM, $chrP, $log);
Getopt::Long::GetOptions (
	'in|I=s'					=>	\$rawdata,
	'out|O=s'					=>	\$cRNA_out,
	'ref|R=s'					=>	\$ref_dir,
	'anno|A=s'					=>	\$anno_gtf,
	'miRNA|MI=s'					=>	\$miRNA,
	'help|H!'					=>	\$help,
	'max_span|S=i'				=>	\$max_circle,
	'low_strigency|L=i'		=>	\$low_strigency,
	'mapq|U=i'					=> \$min_mapq,
	'chrM|M=s'					=>	\$chrM,
	'chrP|P=s'					=>	\$chrP,
	'thread|T=i'				=>	\$thread,
);

### 1. Check the parameters
### Show help information if requested
if (defined($help)) {
	print '
Program:  CircPlant
Version:  1.0

Usage:    perl CircPlant.pl -I read1,read2 -O output -R ref.fa 

Arguments:
    -I, --in
		  input file(s) name, FASTA/FASTQ file from total/non-poly(A) RNA-Seq (required)
    -O, --out
          output directory (required)
    -R, --ref_dir
          FASTA file of reference genome (required)
    -A, --anno
          input GTF/GFF3 formatted annotation file name (optional)
    -MI, --miRNA
		  FASTA file of mature miRNAs (optional)
    -H, --help
          show this help information
    -S, --max_span
          max spanning distance of circRNAs (default: 200000)
    -L, --low_strigency
          output circRNAs supported by more than * junction reads
	-U, --mapq
          set threshold for mappqing quality of each segment of junction reads (default: 10; should be within [0,30])
    -M, --chrM
          tell CircPlant the ID of mitochondrion in reference file (default: chrM)
	-P, --chrP
          tell CircPlant the ID of chloroplast in reference file (default: chrP)	  
    -T, --thread_num
          number of threads for parallel running (default: 1)
';
} elsif ( !defined($rawdata) and !defined($cRNA_out) and !defined($ref_dir) and !defined($help) and !defined($anno_gtf)) {
	print "Please use the --help or -H option to get usage information.\n";
} else {
	### perl directory
	my $dir = dirname($0);
	
	my ($output_dir, $input_dir);
	my (@die_reason, @warn_reason);

	
	### A preliminary check on whether an appropriate raw dataset is provided
	my $scalar;
	if (!defined($rawdata)) {
		push @die_reason, "Please use --in or -I option to designate input dataset!\n";
	} else{
		my @rawdata = split(/,/,$rawdata);
		$scalar = @rawdata;
		if ($scalar > 2){
			push @die_reason, "More than two input files found at designated directory!\n";
		} elsif ($scalar == 0 ){
			push @die_reason, "No input file found at designated directory!\n";
		} elsif ($scalar == 1 ){
			if (!-e $rawdata or !-f $rawdata) {
				push @die_reason, "No input file $rawdata found at designated directory!\n";
			}else {
				$read = $rawdata[0];
			}
		} else {
			my $ii;
			foreach $ii(0..$#rawdata){
				if (!-e $rawdata[$ii] or !-f $rawdata[$ii]) {
					push @die_reason, "No input file $rawdata[$ii] found at designated directory!\n";
				}else {
					$read1 = $rawdata[0]; $read2 = $rawdata[1];
				}
			}
		}
	}
	
	
	### A preliminary check on whether an appropriate output directory is designated
	if (!defined($cRNA_out)) {
		push @die_reason, "Please use --out or -O option to designate output file!\n";
	} else {
		$cRNA_out=$cRNA_out."/";
		if (-e $cRNA_out and -f $cRNA_out) {
			push @warn_reason, " Output file $cRNA_out already exists and is overwritten.\n";
		}
		if (rindex($cRNA_out, "/") >= 0) {
			$output_dir = substr($cRNA_out, 0, rindex($cRNA_out, "/")+1);
			unless (-e $output_dir){
				command_system(qq( mkdir $output_dir ));
			}
		} else {
			$output_dir = "./";
		}
		if (!-w $output_dir) {
			push @die_reason, "Output file cannot be written in the directory $output_dir!\n";
		}
	}
	
	open STDERR, ">>$output_dir/CircPlanterror.log" or warn "CircPlant cannot write to error log: $!";
	
	### Output log
	if (!defined($log)) {
		$log = $output_dir.'CircPlant.log';
	}
	
	
	### A preliminary check on whether an appropriate reference provided by user is appropriate
	my %chr_seq; 
	if (!defined($ref_dir)) {
		push @die_reason, "Please use --ref-dir or -R option to designate one file with all references in!\n";
	} elsif ( defined($ref_dir) and (!-e $ref_dir or !-f $ref_dir) ) {
		push @die_reason, "Reference file $ref_dir does not exist!\n";
	} elsif ( defined($ref_dir) and !-r $ref_dir ) {
		push @die_reason, "Reference file $ref_dir is not readable!\n";
	} elsif ( defined($ref_dir) ) {
		open MOD, ">>", $log;	#or die
		print MOD 	'[', scalar(localtime), "] CircPlant begins running\n";
		print 		'[', scalar(localtime), "] CircPlant begins running\n" ;
		print MOD 	'[', scalar(localtime), "] Loading reference\n";
		print 		'[', scalar(localtime), "] Loading reference\n" ;
			
		my $ref_ID; my @chr_not_found; my %chr_length;
		open CHR1, "<", $ref_dir or push @die_reason, "Cannot open reference file $ref_dir: $!";
		while (<CHR1>) {
			chomp;
			if ( /^>(\S+)/) {
				$ref_ID = $1;
				$chr_length{$ref_ID} = '';
			} else {
				$chr_seq{$ref_ID} .= $_;
			}
		}
		while ( my ($chr, undef) = each %chr_length ) {
			if ( length($chr_seq{$chr}) == 0 and ($chr ne $chrM || $chr ne $chrP) ){
				push @chr_not_found, $chr;
			}
		}
		if (@chr_not_found > 0) {
			push @die_reason, "The following chromosomes are not found in $ref_dir: @chr_not_found\n";
		}
		close CHR1;
	}
	
	
	### A preliminary check on whether an appropriate annotation file is provided
	my ($gff, $gtf, $if_anno);
	my (@anno_gene, @anno_utr, %exonhash);
	if ( defined($anno_gtf) and (!-e $anno_gtf or !-f $anno_gtf) ) {
		push @die_reason, "No annotation file found at designated directory!\n";
	} elsif ( defined($anno_gtf) and !($anno_gtf =~ /\.gff/ or $anno_gtf =~ /\.gtf/) ) {
		push @die_reason, "Please provide .gff or .gtf format as annotation file!\n";
	} elsif ( defined($anno_gtf) and $anno_gtf =~ /\.gff/ ) {
		$gff = 1;
		$if_anno = 1;
	} elsif ( defined($anno_gtf) and $anno_gtf =~ /\.gtf/ ) {
		$gtf = 1;
		$if_anno = 1;
	}
	### If the annotation is formatted gff, further determine the type of gff of the annotation file provided
	if ( defined $gff and $gff == 1 ) {
		print MOD 	'[', scalar(localtime), "] Loading annotation file\n";
		print 		'[', scalar(localtime), "] Loading annotation file\n";
		open ANNO, "<", $anno_gtf or die "Cannot open the annotation file: $!";
		while (<ANNO>) {
			chomp;
			my @line = split /\t/;
			if ( defined $line[2] and $line[2] eq 'gene' ) {
				if ( $line[8] =~ /ID=(\w+)/ ) {
					my $gene_ID = $1;
					push (@anno_gene,[$gene_ID,$line[0],$line[6],$line[3],$line[4]]);
				} elsif( $line[8] =~ /;gene=(\w+)/ ){
					my $gene_ID = $1;
					push (@anno_gene,[$gene_ID,$line[0],$line[6],$line[3],$line[4]]);
				} else {
					die "CIRI cannot understand $anno_gtf! Please refer to manual for details of required GFF formats.\n";
				}
			}
			if ( defined $line[2] and $line[2]=~/prime_utr/) {
				if ( $line[8] =~ /^Parent=((\w+)\.\w*).*/ ) {
					my $gene_ID = $2;
					push (@anno_utr,[$gene_ID,$line[0],$line[6],$line[3],$line[4]]);
				} elsif ($line[8] =~ /;gene=(\w+)/){
					my $gene_ID = $1;
					push (@anno_utr,[$gene_ID,$line[0],$line[6],$line[3],$line[4]]);
				} else {
					die "CIRI cannot understand $anno_gtf! Please refer to manual for details of required GFF formats.\n";
				}
			}
			if ( defined $line[2] and $line[2]=~/exon/) {
				if ( $line[8] =~ /^Parent=((\w+)\.\w*).*/ ) {
					my $gene_ID = $2; my $transid = $1;
					my $exon=join("&&",$gene_ID, $line[0],$line[6],$line[3],$line[4], $transid );
					$exonhash{$gene_ID}.=$exon."||";
				} elsif($line[8] =~ /;gene=(\w+)/){
					my $gene_ID = $1; my $transid = $1;
					my $exon=join("&&",$gene_ID, $line[0],$line[6],$line[3],$line[4], $transid );
					$exonhash{$gene_ID}.=$exon."||";
				} else {
					die "CIRI cannot understand $anno_gtf! Please refer to manual for details of required GFF formats.\n";
				}
			}
		}
		close ANNO;
	}
	### If the annotation is formatted gtf.
	if ( defined $gtf and $gtf == 1){
		print MOD 	'[', scalar(localtime), "] Loading annotation file\n";
		print 		'[', scalar(localtime), "] Loading annotation file\n";
		open ANNO, "<", $anno_gtf or die "Cannot open the annotation file: $!";
		while (<ANNO>) {
			chomp;
			my @line = split /\t/;
			if ( defined $line[2] and $line[2] eq 'gene' ) {
				if ( $line[8] =~ /gene_id \"(\S+)\"/ ) {
					my $gene_ID = $1;
					push (@anno_gene,[$gene_ID,$line[0],$line[6],$line[3],$line[4]]);
				} else {
					die "CIRI cannot understand $anno_gtf! Please refer to manual for details of required GTF formats.\n";
				}
			}
			if ( defined $line[2] and $line[2]=~/prime_utr/) {
				if ( $line[8] =~ /gene_id \"(\S+)\"/ ) {
					my $gene_ID = $1;
					push (@anno_utr,[$gene_ID,$line[0],$line[6],$line[3],$line[4]]);
				} else {
					die "CIRI cannot understand $anno_gtf! Please refer to manual for details of required GTF formats.\n";
				}
			}
			if ( defined $line[2] and $line[2]=~/exon/) {
				if ( $line[8] =~ /gene_id \"(\S+)\"(.+)transcript_id \"(\S+)\"/ ) {
					my $gene_ID = $1; my $transid = $3;
					my $exon=join("&&",$gene_ID, $line[0],$line[6],$line[3],$line[4], $transid );
					$exonhash{$gene_ID}.=$exon."||";
				} else {
					die "CIRI cannot understand $anno_gtf! Please refer to manual for details of required GTF formats.\n";
				}
			}
		}
		close ANNO;
	}
	
	
	### miRNA file 
	if (defined($miRNA) and (!-e $miRNA or !-f $miRNA)){
		push @die_reason, "MiRNA file $miRNA does not exist!\n";
	} elsif (defined($miRNA) and !-r $miRNA){
		push @die_reason, "MiRNA file $miRNA is not readable!\n";
	} 
	
	
	### Maximum of genomic range designated by user for detected circRNAs
	if ( defined($max_circle) and $max_circle < 10_000 ) {
		push @die_reason, "Max span size on reference cannot be smaller than 10000!\n";
	} elsif (!defined($max_circle)) {
		$max_circle = 200_000;
	}
	my $min_circle = 140;
	
	### Stringency designated by user
	if ( defined($low_strigency) and $low_strigency >= 10 ) {
		push @die_reason, "Sensitivity is too high to find circRNA.\n";
	} else {
		$low_strigency = 2;
	}
	
	### A check on minimum mapping quality designated by user
	if (!defined $min_mapq) {
		$min_mapq = 10;
	} elsif ($min_mapq > 30) {
		push @die_reason, "Threshold for mappqing quality of each segment of junction reads cannot be larger than 30!\n";
	} elsif ($min_mapq < 0) {
		push @die_reason, "Threshold for mappqing quality of each segment of junction reads cannot be smaller than 0!\n";
	}
	
	### A check on mitochondrion and chloroplast designated by user
	if (!defined $chrM) {
		$chrM = 'chrM';
	}
	if (!defined $chrP) {
		$chrP = 'chrP';
	}
	
	### Thread designated by user
	if (!defined $thread or $thread == 1) {
		$thread = 1;
	} elsif ($thread > 32) {
		push @die_reason, "Please do not request thread number more than 32, which would not increase more speed of CIRI.\n";
	}
	
	### Quit if fatal error found.
	if (@die_reason >= 1) {
		print @die_reason;
		print "Fatal error. Aborted.\n";
		die;
	} elsif (@warn_reason >= 1) {
		print @warn_reason;
	}
	
	
	
	### 2. Raw dataset alignment
	print MOD 	'[', scalar(localtime), "] First alignment\n";
	print 		'[', scalar(localtime), "] First alignment\n";
	
	my $bwasam1 = $output_dir.'bwa-aln1.sam'; my $bwalog1 = $output_dir.'bwa-aln1.log';
	if ($scalar == 1){
		command_system(qq( bwa index -a bwtsw $ref_dir ));
		command_system(qq( bwa mem -T 19 $ref_dir $read 1> $bwasam1 2> $bwalog1 ));
	} else {
		command_system(qq( bwa index -a bwtsw $ref_dir ));
		command_system(qq( bwa mem -T 19 $ref_dir $read1 $read2 1> $bwasam1 2> $bwalog1 ));
	}
	
	
	
	### 3. Get candidate circRNAs
	print MOD 	'[', scalar(localtime), "] CIRI2 begins running\n";
	print 		'[', scalar(localtime), "] CIRI2 begins running\n";
	print MOD 	"-" x 40, "\n";
	print 	 	"-" x 40, "\n";
	
	my $CIRIout = $output_dir.'CIRI2.output';
	if (defined($anno_gtf)){
		command_system(qq( perl $dir/CIRI2.pl -I $bwasam1 -O $CIRIout -F $ref_dir -A $anno_gtf -S $max_circle -U $min_mapq -M $chrM -T $thread -G $log ));
	} else {
		command_system(qq( perl $dir/CIRI2.pl -I $bwasam1 -O $CIRIout -F $ref_dir -S $max_circle -U $min_mapq -M $chrM -T $thread -G $log ));
	}
	
	print MOD 	"-" x 40, "\n";
	print 	 	"-" x 40, "\n";
	
	
	
	### 4. Second alignment
	### 4.1 getting the full length of candidate circRNA
	print MOD 	'[', scalar(localtime), "] Getting full length of candidate circRNA\n";
	print 		'[', scalar(localtime), "] Getting full length of candidate circRNA\n";
	
	my $can_wholeseq = $output_dir.'candidate-fullseq'; my (@CIRIOUT,$n);
	open CIRIOUT, "<", $CIRIout or die "Cannot open the $CIRIout file: $!";
	open CANWHOSEQ, ">", $can_wholeseq or die "CIRI cannot write $can_wholeseq: $!";
	while(<CIRIOUT>){
		chomp;
		unless ($_ =~ /circRNA_ID/){
			my @line = split /\t/;
			push (@CIRIOUT,[$line[0],$line[1],$line[2],$line[3],$line[4],$line[5],$line[6],$line[7],$line[8],$line[9],$line[10],$line[11]]);
			my ($name,$chrom,$start,$end,$strand) = (split(/\t/,$_))[0,1,2,3,10];
			my $seq = &get_genome_seq_return(\%chr_seq,$chrom,$strand,$start,$end);
			print CANWHOSEQ ">$name\n";
			print CANWHOSEQ $seq,"\n";
		}
	}
	close CIRIOUT;
	close CANWHOSEQ;	

	### 4.2 bwa alignment
	print MOD 	'[', scalar(localtime), "] Second alignment\n";
	print 		'[', scalar(localtime), "] Second alignment\n";
	
	my $bwasam2 = $output_dir.'bwa-aln2.sam'; my $bwalog2 = $output_dir.'bwa-aln2.log';
	if ($scalar == 1){
		command_system(qq( bwa index -a bwtsw $can_wholeseq ));
		command_system(qq( bwa mem -T 19 $can_wholeseq $read 1> $bwasam2 2> $bwalog2 ));
	} else {
		command_system(qq( bwa index -a bwtsw $can_wholeseq ));
		command_system(qq( bwa mem -T 19 $can_wholeseq $read1 $read2 1> $bwasam2 2> $bwalog2 ));
	}	

	### 4.3 getting the output of second alignment
	my %hash_count=(); my %hash_PE3=();
	my $old_reads_name=""; my @real_circrna1;
	
	open BWASAM2, "<", $bwasam2 or die "Cannot open the $bwasam2 file: $!";
	while (<BWASAM2>) {
		chomp;
		my ($reads_name,$circRNA_name,$map_pos,$mapping_quality,$mrnm)=(split(/\t/,$_,12))[0,2,3,4,6];
		if ( $circRNA_name=~/(\w+)\:(\d+)\|(\d+)/ && $mapping_quality >= $min_mapq ){
			my $length=$3-$2+1;
			if ($map_pos == 1 || ($map_pos+$map_pos >= $length && $length-$map_pos>=20)){
				$hash_PE3{$circRNA_name}.=$reads_name."||";
				if($old_reads_name ne $reads_name){
					$hash_count{$circRNA_name}++;
				}
				$old_reads_name=$reads_name;		
			}
		}
	}
	close BWASAM2;
	
	foreach my $key (sort keys %hash_count){
		if($hash_count{$key}>=0){
			$hash_PE3{$key}=~s/\|\|$//;
			my @listPE3 = split(/\|\|/,$hash_PE3{$key});
			my %listPE3count;
			@listPE3 = grep { ++$listPE3count{ $_ } >= 1; } @listPE3;
			if (@listPE3 >= 1){
				push (@real_circrna1, $key);
			}
		}
	}
	
	
	
	### 6. Output
	### 6.1 circRNA type
	print MOD 	'[', scalar(localtime), "] Filter\n";
	print 		'[', scalar(localtime), "] Filter\n";
	
	my $isoforms = $output_dir.'isoforms.circ';my $CIRPout1 = $output_dir.'circRNA.circ';
	if (defined($anno_gtf)){
		@anno_gene=sort {$a->[1] <=> $b->[1] || $a->[3] <=> $b->[3] || $a->[4] <=> $b->[4]} @anno_gene;
		my (@rna_all, @rna_gene, @rna_utr, @rna_intergenic, @rna_exon,  @exonarr);
		my $old_trans_name=""; my $exonhash;
	
		foreach $n(0..$#CIRIOUT){
			my $k;
			foreach $k(0..$#real_circrna1){
				if ($real_circrna1[$k] eq $CIRIOUT[$n][0]){
					my ($i,$j); my $cangeneid = ""; my $c=0;
					push (@rna_all, $CIRIOUT[$n][0]);
		
					foreach $i(0..$#anno_gene){
						if($CIRIOUT[$n][1] eq $anno_gene[$i][1] && $CIRIOUT[$n][10] eq $anno_gene[$i][2] && $CIRIOUT[$n][2] >= $anno_gene[$i][3] && $CIRIOUT[$n][3] <= $anno_gene[$i][4]){
							push (@rna_gene, [$CIRIOUT[$n][0],$anno_gene[$i][0]]);
							$cangeneid = $anno_gene[$i][0];
						}
						if($CIRIOUT[$n][1] eq $anno_gene[$i][1] && $CIRIOUT[$n][10] eq $anno_gene[$i][2] && $CIRIOUT[$n][2] > $anno_gene[$i][4] && $CIRIOUT[$n][3] < $anno_gene[$i+1][3]){
							push (@rna_intergenic,$CIRIOUT[$n][0]);
						}
					}
					foreach $j(0..$#anno_utr){
						if($CIRIOUT[$n][1] eq $anno_utr[$j][1] && $CIRIOUT[$n][10] eq $anno_utr[$j][2] && $CIRIOUT[$n][2] >= $anno_utr[$j][3] && $CIRIOUT[$n][3] <= $anno_utr[$j][4]){
							push (@rna_utr,[$CIRIOUT[$n][0],$anno_utr[$j][0]]);
						}
					}
				
					if ($cangeneid ne ""){
						$exonhash{$cangeneid}=~s/\|\|$//;
						my @list1=split(/\|\|/,$exonhash{$cangeneid});
						for($i=0;$i<scalar @list1;$i++){
							my @list2=(split(/&&/,$list1[$i]));
							$c++;
							if ($CIRIOUT[$n][1] eq $list2[1] && $CIRIOUT[$n][10] eq $list2[2] && (($CIRIOUT[$n][2] >= $list2[3] && $CIRIOUT[$n][2] <= $list2[4]) || ($CIRIOUT[$n][3] >= $list2[3] && $CIRIOUT[$n][3] <= $list2[4]) || ($CIRIOUT[$n][2] >= $list2[3] && $CIRIOUT[$n][3] <= $list2[4]) || ($CIRIOUT[$n][2] <= $list2[3] && $CIRIOUT[$n][3] >= $list2[4]) ) ){
								push (@rna_exon, [$CIRIOUT[$n][0], $list2[0]]);
								if($old_trans_name ne $list2[5]){
									push (@exonarr, $exonhash);
									$exonhash = $CIRIOUT[$n][0].",".$list2[1].",".$list2[2].",exon_".$c."(".$list2[3]."-".$list2[4].")";
								} else {
									$exonhash = $exonhash.",exon_".$c."(".$list2[3]."-".$list2[4].")";
								}
								$old_trans_name = $list2[5];
							}
						}
					}
				}
			}
		}
	
		@rna_intergenic = &set_A_minus_set_B2(\@rna_intergenic,\@rna_gene);

		my @rna_other1 = &set_A_minus_set_B2(\@rna_all,\@rna_gene);
		my @rna_other = &set_A_minus_set_B1(\@rna_other1,\@rna_intergenic);

		@rna_utr = &set_A_minus_set_B3(\@rna_utr,\@rna_exon);
		@rna_utr = &filter_array(\@rna_utr,\@rna_utr);

		my @rna_intron1 = &set_A_minus_set_B3(\@rna_gene,\@rna_exon);
		my @rna_intron = &set_A_minus_set_B3(\@rna_intron1,\@rna_utr);
		@rna_intron = &filter_array(\@rna_intron,\@rna_intron);

		@rna_exon = &filter_array(\@rna_exon,\@rna_exon);
	
		my $totalrna = $#rna_all+1;
		my $exonicrna = $#rna_exon+1;
		print MOD 	'[', scalar(localtime), "] $totalrna were identified, including $exonicrna exonic circRNAs\n";
		print 		'[', scalar(localtime), "] $totalrna were identified, including $exonicrna exonic circRNAs\n";
	
		### 6.2 circRNA output
		print MOD 	'[', scalar(localtime), "] CircRNA information output\n";
		print 		'[', scalar(localtime), "] CircRNA information output\n";
	
		open CIRPOUT1, ">", $CIRPout1 or die "CIRI cannot write $CIRPout1: $!";
		
		print CIRPOUT1 "circRNA_ID	chr	circRNA_strand	circRNA_start	circRNA_end	circRNA_type	gene_id	junction_reads	junction_reads_ID\n";
		foreach $n(0..$#CIRIOUT){
			my $m;
			foreach $m(0..$#rna_exon){
				if ($CIRIOUT[$n][0] eq $rna_exon[$m][0]){
					print CIRPOUT1 "$CIRIOUT[$n][0]\t$CIRIOUT[$n][1]\t$CIRIOUT[$n][10]\t$CIRIOUT[$n][2]\t$CIRIOUT[$n][3]\texonic\t$rna_exon[$m][1]\t$CIRIOUT[$n][4]\t$CIRIOUT[$n][11]\n";
				}
			}	
			foreach $m(0..$#rna_intron){
				if ($CIRIOUT[$n][0] eq $rna_intron[$m][0]){
					print CIRPOUT1 "$CIRIOUT[$n][0]\t$CIRIOUT[$n][1]\t$CIRIOUT[$n][10]\t$CIRIOUT[$n][2]\t$CIRIOUT[$n][3]\tintronic\t$rna_intron[$m][1]\t$CIRIOUT[$n][4]\t$CIRIOUT[$n][11]\n";
				}
			}
			foreach $m(0..$#rna_utr){
				if ($CIRIOUT[$n][0] eq $rna_utr[$m][0]){
					print CIRPOUT1 "$CIRIOUT[$n][0]\t$CIRIOUT[$n][1]\t$CIRIOUT[$n][10]\t$CIRIOUT[$n][2]\t$CIRIOUT[$n][3]\tUTR\t$rna_utr[$m][1]\t$CIRIOUT[$n][4]\t$CIRIOUT[$n][11]\n";
				}
			}
			foreach $m(0..$#rna_intergenic){
				if ($CIRIOUT[$n][0] eq $rna_intergenic[$m]){
					print CIRPOUT1 "$CIRIOUT[$n][0]\t$CIRIOUT[$n][1]\t$CIRIOUT[$n][10]\t$CIRIOUT[$n][2]\t$CIRIOUT[$n][3]\tintergenic\tn/a\t$CIRIOUT[$n][4]\t$CIRIOUT[$n][11]\n";
				}
			}
			foreach $m(0..$#rna_other){
				if ($CIRIOUT[$n][0] eq $rna_other[$m]){
					print CIRPOUT1 "$CIRIOUT[$n][0]\t$CIRIOUT[$n][1]\t$CIRIOUT[$n][10]\t$CIRIOUT[$n][2]\t$CIRIOUT[$n][3]\tother\tn/a\t$CIRIOUT[$n][4]\t$CIRIOUT[$n][11]\n";
				}
			}
		}
		close CIRPOUT1;
	
		### 6.3 circRNA isoforms
		print MOD 	'[', scalar(localtime), "] Getting circRNA isoforms\n";
		print 		'[', scalar(localtime), "] Getting circRNA isoforms\n";
	
		open CIRPOUT1, "<", $CIRPout1 or die "Cannot open the $CIRPout1 file: $!";
		open ISOFORM, ">", $isoforms or die "CIRI cannot write $isoforms: $!";
	
		my ($s,$t);
		foreach $s(0..$#exonarr){
			if ($exonarr[$s] ne ""){
				my @line = split (/,/,$exonarr[$s]);
				my $name=$line[0], my $chrom=$line[1];my $strand=$line[2];my $seq ="";
				foreach $t(3..$#line){
					if ($line[$t] =~ /\((\w+)-(\w+)\)/ ){
						my $start =$1;
						my $end =$2;
						my $seq1 = &get_genome_seq_return(\%chr_seq,$chrom,$strand,$start,$end);
						$seq = $seq.$seq1;
					}
				}
				print ISOFORM ">$name\n";
				print ISOFORM $seq,"\n";
			}
		}
		while(<CIRPOUT1>){
			chomp;
			my @line = split /\t/;
			unless ($line[5] =~ /exonic/ or $line[5] =~ /circRNA_type/){
				my ($name,$chrom,$strand,$start,$end) = (split(/\t/,$_))[0,1,2,3,4];
				my $seq = &get_genome_seq_return(\%chr_seq,$chrom,$strand,$start,$end);
				print ISOFORM ">$name\n";
				print ISOFORM $seq,"\n";
			}
		}
	
		close CIRPOUT1;
		close ISOFORM;
	
	} else {
		### 6.2 circRNA output
		my $totalrna = $#real_circrna1+1;

		print MOD 	'[', scalar(localtime), "] $totalrna were identified\n";
		print 		'[', scalar(localtime), "] $totalrna were identified\n";
		print MOD 	'[', scalar(localtime), "] CircRNA information output\n";
		print 		'[', scalar(localtime), "] CircRNA information output\n";
		
		open CIRPOUT1, ">", $CIRPout1 or die "CIRI cannot write $CIRPout1: $!";
		print CIRPOUT1 "circRNA_ID	chr	circRNA_strand	circRNA_start	circRNA_end	junction_reads	junction_reads_ID\n";
		
		my $k;
		foreach $n(0..$#CIRIOUT){
			foreach $k(0..$#real_circrna1){
				if ($real_circrna1[$k] eq $CIRIOUT[$n][0]){
					print CIRPOUT1 "$CIRIOUT[$n][0]\t$CIRIOUT[$n][1]\t$CIRIOUT[$n][10]\t$CIRIOUT[$n][2]\t$CIRIOUT[$n][3]\t$CIRIOUT[$n][4]\t$CIRIOUT[$n][11]\n";
				}
			}
		}
		close CIRPOUT1;
		
		### 6.3 circRNA isoforms
		print MOD 	'[', scalar(localtime), "] Getting circRNA isoforms\n";
		print 		'[', scalar(localtime), "] Getting circRNA isoforms\n";
		
		open CIRPOUT1, "<", $CIRPout1 or die "Cannot open the $CIRPout1 file: $!";
		open ISOFORM, ">", $isoforms or die "CIRI cannot write $isoforms: $!";
		while(<CIRPOUT1>){
			chomp;
			my @line = split /\t/;
			unless ($line[5] =~ /circRNA_type/ or $line[5] =~ /junction_reads/){
				my ($name,$chrom,$strand,$start,$end) = (split(/\t/,$_))[0,1,2,3,4];
				my $seq = &get_genome_seq_return(\%chr_seq,$chrom,$strand,$start,$end);
				print ISOFORM ">$name\n";
				print ISOFORM $seq,"\n";
			}
		}
		close CIRPOUT1;
		close ISOFORM;
		
	}

	
	
	### 5. circRNA-miRNA interaction
	if (defined($miRNA)){
	
		print MOD 	'[', scalar(localtime), "] Targetfinder begins running\n";
		print 		'[', scalar(localtime), "] Targetfinder begins running\n";
	
		open MIRNA, "<", $miRNA or die "Cannot open the $miRNA file: $!";
		my (%mirna, $previous);
		while (my $line = <MIRNA>){
			chomp $line;
			next if ($line =~ /^\s*$/);
			if (substr($line,0,1) eq '>') {
				$previous = substr($line,1);
				$mirna{$previous} = '';
			} else {
				$mirna{$previous} .= $line;
			}
		}
		close MIRNA;
	
		my $miRNA_mRNA = $output_dir.'targetfinder.output';
		my @miRNAmRNA; my $CIRPout2 = $output_dir.'circRNA-miRNA.circ';
		
		if (-e $miRNA_mRNA){
			system "rm $miRNA_mRNA";
		}
		while (my ($name, $seq) = each(%mirna)) {
			chomp $seq;
			&command_system(qq( perl $dir/targetfinder.pl -s $seq -d $isoforms -q $name >> $miRNA_mRNA));
		}
		
		open MIMRNA, "<", $miRNA_mRNA or die "CIRI cannot write $miRNA_mRNA: $!";
		while(<MIMRNA>){
			chomp;
			if ($_ =~ /query=(\S+), target=(\S+), score/){
				push (@miRNAmRNA, [$2,$1]);
			}
		}
		close MIMRNA;
		@miRNAmRNA = &filter_array(\@miRNAmRNA,\@miRNAmRNA);
		
		
		open CIRPOUT1, "<", $CIRPout1 or die "Cannot open the $CIRPout1 file: $!";
		open CIRPOUT2, ">", $CIRPout2 or die "Cannot open the $CIRPout2 file: $!";
		
		if (defined($anno_gtf)){
			print CIRPOUT2 "circRNA_ID	chr	circRNA_strand	circRNA_start	circRNA_end	circRNA_type	gene_id	miRNA	junction_reads	junction_reads_ID\n";
			while(<CIRPOUT1>){
				chomp;
				my @line = split /\t/; my $p;
				foreach $p(0..$#miRNAmRNA){
					if ($line[0] eq $miRNAmRNA[$p][0]){
						if ($miRNAmRNA[$p][1] =~ /^,(.*)/){
							$miRNAmRNA[$p][1] = $1;
							print CIRPOUT2 "$line[0]\t$line[1]\t$line[2]\t$line[3]\t$line[4]\t$line[5]\t$line[6]\t$miRNAmRNA[$p][1]\t$line[7]\t$line[8]\n";
						} else{
							print CIRPOUT2 "$line[0]\t$line[1]\t$line[2]\t$line[3]\t$line[4]\t$line[5]\t$line[6]\t$miRNAmRNA[$p][1]\t$line[7]\t$line[8]\n";
						} 
					}
				}
			}
		} else {
			print CIRPOUT2 "circRNA_ID	chr	circRNA_strand	circRNA_start	circRNA_end	miRNA	junction_reads	junction_reads_ID\n";
			while(<CIRPOUT1>){
				chomp;
				my @line = split /\t/; my $p;
				foreach $p(0..$#miRNAmRNA){
					if ($line[0] eq $miRNAmRNA[$p][0]){
						if ($miRNAmRNA[$p][1] =~ /^,(.*)/){
							$miRNAmRNA[$p][1] = $1;
							print CIRPOUT2 "$line[0]\t$line[1]\t$line[2]\t$line[3]\t$line[4]\t$miRNAmRNA[$p][1]\t$line[5]\t$line[6]\n";
						} else{
							print CIRPOUT2 "$line[0]\t$line[1]\t$line[2]\t$line[3]\t$line[4]\t$miRNAmRNA[$p][1]\t$line[5]\t$line[6]\n";
						} 
					}
				}
			}
		}
		close CIRPOUT1;
		close CIRPOUT2;
	}
	
	
	print MOD 	'[', scalar(localtime), "] CircPlant finished its work. Please see output directory $output_dir for detail.\n";
	print 		'[', scalar(localtime), "] CircPlant finished its work. Please see output directory $output_dir for detail.\n" ;
	print MOD 	"-" x 80, "\n";
	
	### The subroutines
	sub command_system
	{
		my ($command,$bool_exec)=@_;
		#print $command,"\n";
		system($command) if not $bool_exec;
	}
	
	sub get_genome_seq_return{
		my($genome_hash,$id,$strand,$start,$end)=@_;
		if($strand eq "+"){
			return substr($$genome_hash{$id},$start,$end-$start+1);
		}
		else{
			return &reverse_seq(substr($$genome_hash{$id},$start,$end-$start+1));
		}
	}

	sub reverse_seq{
		my ($reverse_seq)=@_;
		$reverse_seq= reverse $reverse_seq;
		$reverse_seq=~tr/ATCGatcg/TAGCtagc/;
		return $reverse_seq;
	}
	
	
	sub  set_A_minus_set_B1
	{
		my ($listA,$listB)=@_;
		my %hashlistB = map{$_=>1} @$listB;
		my @list = grep {!$hashlistB{$_}} @$listA;
		return @list;
	}

	sub  set_A_minus_set_B2
	{
		my ($listA,$listB)=@_;
		my (@listB,@listC,$i);
		@listB = @$listB;
		foreach $i(0..$#listB){
			push (@listC,$listB[$i][0]);
		}
		my %listB = map{$_=>1} @listC;
		my @list = grep {!$listB{$_}} @$listA;
		return @list;
	}

	sub  set_A_minus_set_B3
	{
		my ($listA,$listB)=@_;
		my (@listA,@listB,@listC,$i,@listD,$k,@list);
		@listA = @$listA; @listB = @$listB;
		foreach $i(0..$#listB){
			push (@listC,$listB[$i][0]);
		}
		foreach $k(0..$#listA){
			push (@listD,$listA[$k][0]);
		}
		@listD=grep {$_} @listD;	
		my %listB = map{$_=>1} @listC;
		my @listE = grep {!$listB{$_}} @listD;
		foreach $i(0..$#listA){
			foreach $k(0..$#listE){
				if ($listA[$i][0] eq $listE[$k]){
					push (@list,[$listA[$i][0],$listA[$i][1]]);
				}
			}
		}
		return @list;
	}

	sub filter_array
	{
		my ($listA,$listB) = @_;
		my @array = @$listA;
		my ($m,$n,%arrayhash,$key,$value,@list); 
		foreach $m(0..$#array){
			if ($arrayhash{$array[$m][0]}){
				$arrayhash{$array[$m][0]}=$arrayhash{$array[$m][0]}.",".$array[$m][1];
			} else {
				$arrayhash{$array[$m][0]}=$array[$m][1];
			}
		} 
		while (($key,$value)=each %arrayhash){
			my @listC = split(/,/,$value);
			my %listCcount;my $genecount="";
			@listC = grep { ++$listCcount{ $_ } < 2; } @listC;
			if (@listC > 1 ){
				foreach $n(0..$#listC){
					$genecount=$genecount.",".$listC[$n];
				}
			} else {
				$genecount = $listC[0];
			}
			push (@list, [$key, $genecount]);
		}
		return @list;
	}
	
}