From: Jason Woofenden Date: Tue, 15 Jan 2013 11:54:50 +0000 (-0500) Subject: calculate letter frequency from wordlist X-Git-Url: https://jasonwoof.com/gitweb/?p=hexbog.git;a=commitdiff_plain;h=c0823d94d3ddcfd3006b931ebf2e9c1116553b1b calculate letter frequency from wordlist --- diff --git a/freq.coffee b/freq.coffee new file mode 100644 index 0000000..bf4e497 --- /dev/null +++ b/freq.coffee @@ -0,0 +1,65 @@ +#!/usr/bin/coffee + +# this script tries to calculate an optimal letter distrobution. + +fs = require 'fs' + + +fs.readFile 'wordlist.txt', 'utf8', (err, data) -> + if err? + console.log "Error reading #{orig_file}: #{err}" + process.exit(1) + else + weights = { + a: 0 + b: 0 + c: 0 + d: 0 + e: 0 + f: 0 + g: 0 + h: 0 + i: 0 + j: 0 + k: 0 + l: 0 + m: 0 + n: 0 + o: 0 + p: 0 + q: 0 + r: 0 + s: 0 + t: 0 + u: 0 + v: 0 + w: 0 + x: 0 + y: 0 + z: 0 + qu: 0 + } + words = data.split '\n' + for word in words + weight = 1.0 * Math.pow .6, (Math.abs(word.length - 4)) + i = 0 + while i < word.length + if word[i] is 'q' and i < word.length and word[i+1] is 'u' + weights['qu'] += weight + i += 2 + else + #if word[i] is 'u' and i > 0 and word[i-1] is 'q' + # console.log "skipping the u of a qu didn't work" + weights[word[i]] += weight + i += 1 + + total = 0 + for letter, weight of weights + total += Math.round(weight) + + for letter, weight of weights + weight = Math.round(weight) + #console.log "#{letter}: #{Math.round(weight / total * 1000)} (1 in #{Math.round(total / weight)})" + console.log "#{weight} # #{letter}" + + console.log "total: #{total}" diff --git a/main.coffee b/main.coffee index 99d2e43..5c1fab7 100644 --- a/main.coffee +++ b/main.coffee @@ -90,35 +90,36 @@ selected = [] letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" letter_distribution = [ - 9 # A - 2 # B - 2 # C - 4 # D - 14 # E - 2 # F - 3 # G - 2 # H - 7 # I - 1 # J - 1 # K - 4 # L - 2 # M - 6 # N - 8 # O - 2 # P - 1 # Q - 6 # R - 7 # S - 6 # T - 2 # U - 2 # V - 2 # W - 1 # X - 2 # Y - 1 # Z + 14355 # a + 3968 # b + 6325 # c + 7045 # d + 20258 # e + 2739 # f + 5047 # g + 4372 # h + 13053 # i + 516 # j + 2600 # k + 9631 # l + 5115 # m + 10082 # n + 11142 # o + 5292 # p + 287 # qu + 12341 # r + 16571 # s + 10215 # t + 6131 # u + 1728 # v + 2184 # w + 619 # x + 3512 # y + 831 # z + ] -letter_distribution_total = letter_distribution.sum() +letter_distribution_total = 175973 # letter_distribution.sum() new_letter_queue = []