From: Jason Woofenden Date: Thu, 8 Nov 2012 07:34:59 +0000 (-0500) Subject: script to find word-frequency of tree of files X-Git-Url: https://jasonwoof.com/gitweb/?p=four-random-words.git;a=commitdiff_plain;h=125e938ec9e8effca3345cc446fd4b80a825c63c script to find word-frequency of tree of files --- diff --git a/word-frequency-here.bash b/word-frequency-here.bash new file mode 100755 index 0000000..ee16c58 --- /dev/null +++ b/word-frequency-here.bash @@ -0,0 +1,13 @@ +#!/bin/bash + +find . -type f -print0 \ + | xargs -0 grep -I --no-filename -v '(notice)' \ + | sed -e 's/<[^>]*>//g; s/ / /g; s/&[nm]dash;/-/' \ + | sed -ne "s/^(..:..:.. ..) [a-zA-Z0-9@ _.-]\{1,25\}: \(.*\)$/\1/p" \ + | perl -0777 -p -e's/\s+/\n/mg' \ + | grep -v '^&.*;$' \ + | sed -e 's/^\W\+//g; s/\W\+$//g' \ + | grep '^[a-z.'\''-]\{3,16\}$' \ + | sort \ + | uniq -c \ + | sort -nr