Bash Commands

# Activate conda env 'bioinfo'
conda activate bioinfo

# View text of a file
more <filename>

# Download a file
curl <url> --output <filename>

# Print the number of lines, words, characters 
cat <filename> | wc

# Print the first part of a file
cat <filename> | head

# Find a certain phrase
cat <filename> | grep <phrase>

# Count how many lines DON'T match (-v reverses the action of grep, -l is lines)
cat <filename> | grep -v <phrase> | wc -l

# Store the results in a new file
cat <filename> | grep YAL060W > match.tab

# Slice into a line and extracts text (-f is field separated with a tab) (2 is 2nd field)
cat <filename> | cut -f 2 | head

# Count the number of items matching 'ORF'
cat <filename> | cut -f 2 | grep ORF | wc -l

# Regex multiple fields, Print multiple fields 
cat <filename> | cut -f 2,3,4 | grep ORF | head

# Sort places identical consecutive entries next to one another
cat <filename> | sort | head

# Sort -rn prints decending order
cat types.txt | sort | uniq -c | sort -rn | head

# uniq collapses consecutive identical words into one
cat <filename> | sort | uniq | head

# uniq -c also prints their count
cat <filename> | sort | uniq -c | head

# There is one fundamental flaw with the way the sort | uniq -c | sort -rn pattern works. 
# The output is space delimited and padded. The spaces and padding make operating on the 
# results surprisingly challenging.
# The good news is that the pattern is common and useful that the entrez-direct 
# package offers two commands that streamline the “sort-uniq-sort” pattern. 
# The commands are called:

sort-uniq-count # performs <sort | uniq -c | sort -rn> and outputs tab delimited
# and
sort-uniq-count-rank

# You can print the names of programs like sort-uniq-count-rank by 
# typing sort then hitting <tab> twice

# To create a makefile
touch makefile

# Editing file
nano <filename>
# OR edit in VS code
code .