From 125e938ec9e8effca3345cc446fd4b80a825c63c Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Thu, 8 Nov 2012 02:34:59 -0500 Subject: [PATCH] script to find word-frequency of tree of files --- word-frequency-here.bash | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100755 word-frequency-here.bash diff --git a/word-frequency-here.bash b/word-frequency-here.bash new file mode 100755 index 0000000..ee16c58 --- /dev/null +++ b/word-frequency-here.bash @@ -0,0 +1,13 @@ +#!/bin/bash + +find . -type f -print0 \ + | xargs -0 grep -I --no-filename -v '(notice)' \ + | sed -e 's/<[^>]*>//g; s/ / /g; s/&[nm]dash;/-/' \ + | sed -ne "s/^(..:..:.. ..) [a-zA-Z0-9@ _.-]\{1,25\}: \(.*\)$/\1/p" \ + | perl -0777 -p -e's/\s+/\n/mg' \ + | grep -v '^&.*;$' \ + | sed -e 's/^\W\+//g; s/\W\+$//g' \ + | grep '^[a-z.'\''-]\{3,16\}$' \ + | sort \ + | uniq -c \ + | sort -nr -- 1.7.10.4