JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
script to find word-frequency of tree of files
authorJason Woofenden <jason@jasonwoof.com>
Thu, 8 Nov 2012 07:34:59 +0000 (02:34 -0500)
committerJason Woofenden <jason@jasonwoof.com>
Thu, 8 Nov 2012 07:34:59 +0000 (02:34 -0500)
word-frequency-here.bash [new file with mode: 0755]

diff --git a/word-frequency-here.bash b/word-frequency-here.bash
new file mode 100755 (executable)
index 0000000..ee16c58
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+find . -type f -print0 \
+       | xargs -0 grep -I --no-filename -v '(notice)' \
+       | sed -e 's/<[^>]*>//g; s/&nbsp;/ /g; s/&[nm]dash;/-/' \
+       | sed -ne "s/^(..:..:.. ..) [a-zA-Z0-9@ _.-]\{1,25\}: \(.*\)$/\1/p" \
+       | perl -0777 -p -e's/\s+/\n/mg' \
+       | grep -v '^&.*;$' \
+       | sed -e 's/^\W\+//g; s/\W\+$//g' \
+       | grep '^[a-z.'\''-]\{3,16\}$' \
+       | sort \
+       | uniq -c \
+       | sort -nr