JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
coffeescript to filter by huge wordlist
[four-random-words.git] / word-frequency-here.bash
1 #!/bin/bash
2
3 find . -type f -print0 \
4         | xargs -0 grep -I --no-filename -v '(notice)' \
5         | sed -e 's/<[^>]*>//g; s/&nbsp;/ /g; s/&[nm]dash;/-/' \
6         | sed -ne "s/^(..:..:.. ..) [a-zA-Z0-9@ _.-]\{1,25\}: \(.*\)$/\1/p" \
7         | perl -0777 -p -e's/\s+/\n/mg' \
8         | grep -v '^&.*;$' \
9         | sed -e 's/^\W\+//g; s/\W\+$//g' \
10         | grep '^[a-z.'\''-]\{3,16\}$' \
11         | sort \
12         | uniq -c \
13         | sort -nr