<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Algorithm on Peczenyj's Blog</title><link>http://pacman.blog.br/categories/algorithm/</link><description>Recent content in Algorithm on Peczenyj's Blog</description><generator>Hugo</generator><language>en-us</language><lastBuildDate>Sat, 29 Dec 2012 03:17:00 +0000</lastBuildDate><atom:link href="http://pacman.blog.br/categories/algorithm/atom.xml" rel="self" type="application/rss+xml"/><item><title>Spell Correct in GNU AWK</title><link>http://pacman.blog.br/blog/2012/12/29/spell-correct-in-gawk/</link><pubDate>Sat, 29 Dec 2012 03:17:00 +0000</pubDate><guid>http://pacman.blog.br/blog/2012/12/29/spell-correct-in-gawk/</guid><description>&lt;p>Based on &lt;a href="http://norvig.com/spell-correct.html">Peter Norvig Spell Correct&lt;/a>&lt;/p>
&lt;div class="highlight">&lt;pre tabindex="0" style="color:#93a1a1;background-color:#002b36;-moz-tab-size:4;-o-tab-size:4;tab-size:4;">&lt;code class="language-awk" data-lang="awk">&lt;span style="display:flex;">&lt;span>&lt;span style="color:#586e75"># Usage: gawk -v word=some_word_to_verify -f spelling.awk [ big.txt [ big2.txt ... ]]&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#586e75"># Gawk version with 15 lines -- 04/13/2008&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#586e75"># Author: tiago (dot) peczenyj (at) gmail (dot) com&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#586e75"># about.me/peczenyj&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#586e75"># Based on : http://norvig.com/spell-correct.html&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#268bd2">function&lt;/span> edits(w,max,candidates,list, i,j){
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">for&lt;/span>(i&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#2aa198">0&lt;/span>;i&lt;span style="color:#719e07">&amp;lt;&lt;/span> max ;&lt;span style="color:#719e07">++&lt;/span>i) &lt;span style="color:#719e07">++&lt;/span>list[&lt;span style="color:#268bd2">substr&lt;/span>(w,&lt;span style="color:#2aa198">0&lt;/span>,i) &lt;span style="color:#268bd2">substr&lt;/span>(w,i&lt;span style="color:#719e07">+&lt;/span>&lt;span style="color:#2aa198">2&lt;/span>)] &lt;span style="color:#586e75"># deletes&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">for&lt;/span>(i&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#2aa198">0&lt;/span>;i&lt;span style="color:#719e07">&amp;lt;&lt;/span> max&lt;span style="color:#719e07">-&lt;/span>&lt;span style="color:#2aa198">1&lt;/span>;&lt;span style="color:#719e07">++&lt;/span>i) &lt;span style="color:#719e07">++&lt;/span>list[&lt;span style="color:#268bd2">substr&lt;/span>(w,&lt;span style="color:#2aa198">0&lt;/span>,i) &lt;span style="color:#268bd2">substr&lt;/span>(w,i&lt;span style="color:#719e07">+&lt;/span>&lt;span style="color:#2aa198">2&lt;/span>,&lt;span style="color:#2aa198">1&lt;/span>) &lt;span style="color:#268bd2">substr&lt;/span>(w,i&lt;span style="color:#719e07">+&lt;/span>&lt;span style="color:#2aa198">1&lt;/span>,&lt;span style="color:#2aa198">1&lt;/span>) &lt;span style="color:#268bd2">substr&lt;/span>(w,i&lt;span style="color:#719e07">+&lt;/span>&lt;span style="color:#2aa198">3&lt;/span>)] &lt;span style="color:#586e75"># transposes&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">for&lt;/span>(i&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#2aa198">0&lt;/span>;i&lt;span style="color:#719e07">&amp;lt;&lt;/span> max ;&lt;span style="color:#719e07">++&lt;/span>i) &lt;span style="color:#719e07">for&lt;/span>(j &lt;span style="color:#719e07">in&lt;/span> alpha) &lt;span style="color:#719e07">++&lt;/span>list[&lt;span style="color:#268bd2">substr&lt;/span>(w,&lt;span style="color:#2aa198">0&lt;/span>,i) alpha[j] &lt;span style="color:#268bd2">substr&lt;/span>(w,i&lt;span style="color:#719e07">+&lt;/span>&lt;span style="color:#2aa198">2&lt;/span>)] &lt;span style="color:#586e75"># replaces&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">for&lt;/span>(i&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#2aa198">0&lt;/span>;i&lt;span style="color:#719e07">&amp;lt;=&lt;/span> max ;&lt;span style="color:#719e07">++&lt;/span>i) &lt;span style="color:#719e07">for&lt;/span>(j &lt;span style="color:#719e07">in&lt;/span> alpha) &lt;span style="color:#719e07">++&lt;/span>list[&lt;span style="color:#268bd2">substr&lt;/span>(w,&lt;span style="color:#2aa198">0&lt;/span>,i) alpha[j] &lt;span style="color:#268bd2">substr&lt;/span>(w,i&lt;span style="color:#719e07">+&lt;/span>&lt;span style="color:#2aa198">1&lt;/span>)] &lt;span style="color:#586e75"># inserts&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">for&lt;/span>(i &lt;span style="color:#719e07">in&lt;/span> list) &lt;span style="color:#719e07">if&lt;/span>(i &lt;span style="color:#719e07">in&lt;/span> NWORDS) candidates[i] &lt;span style="color:#719e07">=&lt;/span> NWORDS[i] } 
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#268bd2">function&lt;/span> correct(word ,candidates,i,list,max,temp){
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> edits(word,&lt;span style="color:#268bd2">length&lt;/span>(word),candidates,list)
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">if&lt;/span> (&lt;span style="color:#719e07">!&lt;/span>&lt;span style="color:#268bd2">asort&lt;/span>(candidates,temp)) &lt;span style="color:#719e07">for&lt;/span>(i &lt;span style="color:#719e07">in&lt;/span> list) edits(i,&lt;span style="color:#268bd2">length&lt;/span>(i),candidates)
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">return&lt;/span> (max &lt;span style="color:#719e07">=&lt;/span> &lt;span style="color:#268bd2">asorti&lt;/span>(candidates)) ? candidates[max] : word }
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#b58900">BEGIN&lt;/span>{ &lt;span style="color:#719e07">if&lt;/span> (&lt;span style="color:#b58900">ARGC&lt;/span> &lt;span style="color:#719e07">==&lt;/span> &lt;span style="color:#2aa198">1&lt;/span>) &lt;span style="color:#b58900">ARGV&lt;/span>[&lt;span style="color:#b58900">ARGC&lt;/span>&lt;span style="color:#719e07">++&lt;/span>] &lt;span style="color:#719e07">=&lt;/span> &lt;span style="color:#2aa198">&amp;#34;big.txt&amp;#34;&lt;/span> &lt;span style="color:#586e75"># http://norvig.com/big.txt&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#719e07">while&lt;/span>(&lt;span style="color:#719e07">++&lt;/span>i&lt;span style="color:#719e07">&amp;lt;=&lt;/span>&lt;span style="color:#268bd2">length&lt;/span>(x&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#2aa198">&amp;#34;abcdefghijklmnopqrstuvwxyz&amp;#34;&lt;/span>)) alpha[i]&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#268bd2">substr&lt;/span>(x,i,&lt;span style="color:#2aa198">1&lt;/span>)
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#b58900">IGNORECASE&lt;/span>&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#b58900">RS&lt;/span>&lt;span style="color:#719e07">=&lt;/span>&lt;span style="color:#2aa198">&amp;#34;[^&amp;#34;&lt;/span>x&lt;span style="color:#2aa198">&amp;#34;]+&amp;#34;&lt;/span> }
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>{ &lt;span style="color:#719e07">++&lt;/span>NWORDS[&lt;span style="color:#268bd2">tolower&lt;/span>(&lt;span style="color:#719e07">$&lt;/span>&lt;span style="color:#2aa198">1&lt;/span>)] }
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#b58900">END&lt;/span>{ &lt;span style="color:#268bd2">print&lt;/span> (word &lt;span style="color:#719e07">in&lt;/span> NWORDS) ? word : &lt;span style="color:#2aa198">&amp;#34;correct(&amp;#34;&lt;/span>word&lt;span style="color:#2aa198">&amp;#34;)=&amp;gt; &amp;#34;&lt;/span> correct(&lt;span style="color:#268bd2">tolower&lt;/span>(word)) }
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/div>&lt;p>This is my version of the Norvig&amp;rsquo;s Spell Corrector in gnu awk.&lt;/p></description></item></channel></rss>