# !mkdir /local/shared/pinsky_lab/sequencing/hiseq_2014_08_07_SEQ03/
%%bash
cd ~/02-apcl-ddocent
# mkdir 03seq
cd ./03seq/
# mkdir logs samples scripts
mkdir bcsplit 01Pool 02Pool 03Pool 04Pool
cd bcsplit/
mkdir lane1 lane2
%%bash
cd ~/02-apcl-ddocent
# cp 16seq/logs/barcodes 03seq/logs/
started 4:12pm on Monday, still running at 11pm, not sure if it finished. Retry through terminal at 8:42am Tuesday - done by 10am on Wednesday (didn't check sooner)
cd ~/02-apcl-ddocent/03seq/bcsplit/lane1
nohup /local/home/michelles/14_programs/paired_sequence_utils/barcode_splitter.py --bcfile ../../logs/index-seq03 --idxread 2 --suffix .fastq.gz /local/shared/pinsky_lab/sequencing/hiseq_2014_08_07_SEQ03/clownfish-ddradseq-seq03-for-222-cycles-ha1wgadxx_1_read_1_passed_filter.fastq.gz /local/shared/pinsky_lab/sequencing/hiseq_2014_08_07_SEQ03/clownfish-ddradseq-seq03-for-222-cycles-ha1wgadxx_1_read_2_index_read_passed_filter.fastq.gz &
cd ~/02-apcl-ddocent/03seq/bcsplit/lane2
nohup /local/home/michelles/14_programs/paired_sequence_utils/barcode_splitter.py --bcfile ../../logs/index-seq03 --idxread 2 --suffix .fastq.gz /local/shared/pinsky_lab/sequencing/hiseq_2014_08_07_SEQ03/clownfish-ddradseq-seq03-for-222-cycles-ha1wgadxx_2_read_1_passed_filter.fastq.gz /local/shared/pinsky_lab/sequencing/hiseq_2014_08_07_SEQ03/clownfish-ddradseq-seq03-for-222-cycles-ha1wgadxx_2_read_2_index_read_passed_filter.fastq.gz &
!head ~/02-apcl-ddocent/03seq/bcsplit/lane1/nohup.out
!mv ~/02-apcl-ddocent/03seq/bcsplit/lane1/nohup.out ~/02-apcl-ddocent/03seq/logs/lane1.out
!head ~/02-apcl-ddocent/03seq/bcsplit/lane2/nohup.out
!mv ~/02-apcl-ddocent/03seq/bcsplit/lane2/nohup.out ~/02-apcl-ddocent/03seq/logs/lane2.out
For large batches, generated the command lines in a script in the seq_proc.Rmd notebook and output the lines into files called SEQXX_cat_all.sh to be copy and pasted here
%%bash
cd ~/02-apcl-ddocent/03seq/bcsplit/
cat ./lane1/P012-read-1.fastq.gz ./lane2/P012-read-1.fastq.gz > ../01Pool/P012.fastq.gz
cat ./lane1/P013-read-1.fastq.gz ./lane2/P013-read-1.fastq.gz > ../02Pool/P013.fastq.gz
cat ./lane1/P014-read-1.fastq.gz ./lane2/P014-read-1.fastq.gz > ../03Pool/P014.fastq.gz
cat ./lane1/P015-read-1.fastq.gz ./lane2/P015-read-1.fastq.gz > ../04Pool/P015.fastq.gz
If this notebook closes, these processes stop - ended up pasting this into a terminal window because I was afraid the network would drop and the process would stop
Started at 1:24pm on Wednesday
%%bash
cd ~/02-apcl-ddocent/03seq/01Pool/
nohup ../scripts/12process.sh &
cd ~/02-apcl-ddocent/03seq/02Pool/
nohup ../scripts/13process.sh &
cd ~/02-apcl-ddocent/03seq/03Pool/
nohup ../scripts/14process.sh &
cd ~/02-apcl-ddocent/03seq/04Pool/
nohup ../scripts/15process.sh &
The script did this for us so we don't have to
#%%bash
#mv 01Pool/process_radtags.log ./logs/processP012.log
#mv 02Pool/process_radtags.log ./logs/processP013.log
#mv 03Pool/process_radtags.log ./logs/processP014.log
#mv 04Pool/process_radtags.log ./logs/processP015.log
%%bash
cd ~/02-apcl-ddocent/03seq/01Pool/
readprocesslog.py ../logs/12process.out
cd ~/02-apcl-ddocent/03seq/02Pool/
readprocesslog.py ../logs/13process.out
cd ~/02-apcl-ddocent/03seq/03Pool/
readprocesslog.py ../logs/14process.out
cd ~/02-apcl-ddocent/03seq/04Pool/
readprocesslog.py ../logs/15process.out
%%bash
cd ~/02-apcl-ddocent/03seq/
cd 01Pool/
sh rename.for.dDocent_se_gz ../logs/names_012.tsv
mv APCL* ../samples/
cd ../02Pool/
sh rename.for.dDocent_se_gz ../logs/names_013.tsv
mv APCL* ../samples/
cd ../03Pool/
sh rename.for.dDocent_se_gz ../logs/names_014.tsv
mv APCL* ../samples/
cd ../04Pool/
sh rename.for.dDocent_se_gz ../logs/names_015.tsv
mv APCL* ../samples/
pwd
!head /local/home/michelles/02-apcl-ddocent/03seq/logs/names_012.tsv
!rm -r ~/02-apcl-ddocent/03seq/*Pool
All samples from SEQ03 were catted when we catted SEQ28.
% cp /local/home/michelles/02-apcl-ddocent/jonsfiles/reference.fasta /local/home/michelles/02-apcl-ddocent/03seq/samples
for this phase, dDocent will ask questions.
yes
This depends on how many people are trying to use amphiprion right now. The trim and map section isn't too intensive so it should be ok to use alot. I used 30 for SEQ18 - 576 samples
30
Again, consider who else is using the machine, for SEQ18 - 576 samples I used 150
150
yes
no - this is for creating the reference originally
yes
default match score is 1, default mismatch is 4, gap penalty is 6 - I used defaults for all
yes
!dDocent