Wednesday, March 06, 2013

 

Quick CGI SpellChecker


Here's a small CGI program that provides spellchecking services based on GNU Aspell. I use it on a local network where Delphi Win32 clients connects to this CGI hosted on an Apache Server, running on Linux, to ask for spelling suggestions.

This simple spellchecker has a small API that does the very basic, that is, check for word spelling, add word to dictionary, and delete word from dictionary. The client side can be done using any language capable of doing http GET requests, and handling JSON responses.

Prerequisites:


Of course, the first requisite is to install Aspell and one or more dictionaries. On apt-get based systems, you'll install them using this:

sudo apt-get install aspell
sudo apt-get install aspell-en_US

Then next step is to create a personal dictionary, this is a just a plain text file where new words will be added. The file must have just one line, containing this:

personal_ws-1.1 en 0 

If the dictionary will be using, for example, spanish words, you must replace "personal_ws-1.1 en 0" by "personal_ws-1.1 es 0", and do:

sudo apt-get install aspell-es_ES  (or es_AR for Argentina).

IMPORTANT: please set RW attributes to the file, to allow read/write by everyone.

How it works, the API:


All requests must be done using these commands:

/cgi-bin/cgiaspell/TSpellCheck/WordSpell?word=
/cgi-bin/cgiaspell/TSpellCheck/WordAdd?word=
/cgi-bin/cgiaspell/TSpellCheck/WordDelete?word=

Here is any word to be spelled, added or deleted from dictionary.

Spell checking


For example, if you want to check spelling on word "houuse", youl have to do this:
http://myserver /cgi-bin/cgiaspell/TSpellCheck/WordSpell?word=houuse
The result is this JSON string:
{ "replacements" : ["House", "house", "hose", "horse", "hours", "hoarse", 
"hoes", "hues", "Hosea", "housed", "houses", "Hus", "hos", "horsey", "hour's", 
"Ho's", "hows", "huhs", "Horus", "hoarser", "douse", "louse", "mouse", "rouse", 
"souse", "Hausa", "Hesse", "hoe's", "hoers", "how's", "hussy", "Hui's", 
"House's", "house's", "hue's", "hoar's", "hoer's", "Horus's"], "total" : 38 }

Adding a word to the personal dictionary


http://myserver /cgi-bin/cgiaspell/TSpellCheck/WordAdd?word=houuse

This will return "Ok." if the word was added correctly.

Deleting a word from the personal dictionary


http://myserver /cgi-bin/cgiaspell/TSpellCheck/WordDelete?word=houuse

This will return "Ok." if the word was removed correctly, or, a message saying it wasn't deleted.

The program


In Lazarus, just create a CGI Application (you'll need the WebLaz package), save the project as "cgiaspell.lpi", and rename unit1 to main.

Now, adapt your main.lfm to this :

object SpellCheck: TSpellCheck
  OnCreate = DataModuleCreate
  OldCreateOrder = False
  Actions = <  
    item
      Name = 'WordSpell'
      Default = False
      OnRequest = WordSpellRequest
      Template.AllowTagParams = False
    end  
    item
      Name = 'WordAdd'
      Default = False
      OnRequest = WordAddRequest
      Template.AllowTagParams = False
    end  
    item
      Name = 'WordDelete'
      Default = False
      OnRequest = WordDeleteRequest
      Template.AllowTagParams = False
    end>
  CreateSession = False
  Height = 150
  HorizontalOffset = 250
  VerticalOffset = 250
  Width = 150
end

Then do the same to main.pas as this:

unit main;

{$mode objfpc}{$H+}

interface

uses
  SysUtils, Classes, httpdefs, fpHTTP, fpWeb,
  process,
  fpjson;

type

  { TSpellCheck }

  TSpellCheck = class(TFPWebModule)
    procedure DataModuleCreate(Sender: TObject);
    procedure WordAddRequest(Sender: TObject; ARequest: TRequest;
      AResponse: TResponse; var Handled: Boolean);
    procedure WordDeleteRequest(Sender: TObject; ARequest: TRequest;
      AResponse: TResponse; var Handled: Boolean);
    procedure WordSpellRequest(Sender: TObject; ARequest: TRequest;
      AResponse: TResponse; var Handled: Boolean);
  private
    function ASpellToJSON(AAspellResult: string): string;
    function SpellWord(AWord: string): string;
  public
    { public declarations }
  end;

var
  SpellCheck: TSpellCheck;

const
  cDictionary = '/home/leonardo/.aspell.es_AR.pws';

implementation

{$R *.lfm}

{ TSpellCheck }

procedure TSpellCheck.DataModuleCreate(Sender: TObject);
begin

end;

procedure TSpellCheck.WordAddRequest(Sender: TObject; ARequest: TRequest;
  AResponse: TResponse; var Handled: Boolean);
var
  lStr: TStringList;
  lWord: string;
begin
  if ARequest.QueryFields.IndexOfName('word') = - 1 then
    raise Exception.Create('word param is not present')
  else
    lWord := ARequest.QueryFields.Values['word'];

  // todo: this should be replaced by something
  // more reliable. I.e.: what happens if cDictionary is blocked
  // by another process, or LoadFromFile can be slow on big dictionaries.
  lStr := TStringList.Create;
  try
    lStr.LoadFromFile(cDictionary);
    if lStr.IndexOf( LowerCase(lWord) ) = -1 then
    begin
      lStr.Add(lWord);
      lStr.SaveToFile(cDictionary);
      AResponse.Content := 'Ok.';
    end
    else
      AResponse.Content := lWord + ' already in dictionary.';
  finally
    lStr.Free;
  end;
  Handled:= True;
end;

procedure TSpellCheck.WordDeleteRequest(Sender: TObject; ARequest: TRequest;
  AResponse: TResponse; var Handled: Boolean);
var
  lStr: TStringList;
  lIdx: Integer;
  lWord: string;

begin
  if ARequest.QueryFields.IndexOfName('word') = - 1 then
    raise Exception.Create('word param is not present')
  else
    lWord := ARequest.QueryFields.Values['word'];

  // todo: this should be replaced by something
  // more reliable. I.e.: what happens if cDictionary is blocked
  // by another process, or LoadFromFile can be slow on big dictionaries.
  lStr := TStringList.Create;
  try
    lStr.LoadFromFile(cDictionary);
    lIdx := lStr.IndexOf( LowerCase(lWord) );
    if lIdx <> -1 then
    begin
      lStr.Delete(lIdx);
      lStr.SaveToFile(cDictionary);
      AResponse.Content := 'Ok.';
    end
    else
      AResponse.Content := lWord + ' not in dictionary.';
  finally
    lStr.Free;
  end;
  Handled:= True;
end;

procedure TSpellCheck.WordSpellRequest(Sender: TObject; ARequest: TRequest;
  AResponse: TResponse; var Handled: Boolean);
var
  lWord: string;
begin
  if ARequest.QueryFields.IndexOfName('word') = - 1 then
    raise Exception.Create('word param is not present')
  else
    lWord := ARequest.QueryFields.Values['word'];

  AResponse.Content := SpellWord(lWord);
  Handled := True;
end;

function TSpellCheck.ASpellToJSON(AAspellResult: string): string;
var
  lStr: TStringList;
  lJSon: TJSONObject;
  lJsonArray: TJSONArray;
  I: Integer;
begin
  Result := '';
  lStr := TStringList.Create;
  lJson := TJSONObject.Create;
  try
    if Pos(':', AAspellResult) > 0 then
      lStr.CommaText:= Copy(AAspellResult, Pos(':', AAspellResult) + 1, Length(AAspellResult));
    lJsonArray := TJSONArray.Create;
    for I := 0 to lStr.Count - 1 do
      lJsonArray.Add(lStr[I]);
    lJSon.Add('replacements', lJsonArray);
    lJson.Add('total', lStr.Count);
    Result := lJSon.AsJSON;
  finally
    lJSon.Free;
    lStr.Free;
  end;
end;

function TSpellCheck.SpellWord(AWord: string): string;
var
  lProcess: TProcess;
  Buffer: array[0..2048] of char;
  ReadCount: Integer;
  ReadSize: Integer;
begin
  lProcess := TProcess.Create(nil);
  lProcess.Options := [poUsePipes,poStderrToOutPut];
  lProcess.CommandLine := '/usr/bin/aspell -a --lang=es_AR -p ' + cDictionary;
  lProcess.Execute;
  lProcess.Input.Write(PAnsiChar(AWord)[0], Length(AWord));
  lProcess.CloseInput;

  while lProcess.Running do
    Sleep(1);

  ReadSize := lProcess.Output.NumBytesAvailable;
  if ReadSize > SizeOf(Buffer) then
    ReadSize := SizeOf(Buffer);
  if ReadSize > 0 then
  begin
    ReadCount := lProcess.Output.Read(Buffer, ReadSize);
    Result := Copy(Buffer,0, ReadCount);
    Result := ASpellToJSon(Result);
  end
  else
    raise Exception.Create(Format('Exit status: %d', [lProcess.ExitStatus]));

  lProcess.Free;
end;

initialization
  RegisterHTTPModule('TSpellCheck', TSpellCheck);
end.

Compile, copy to your Apache CGI directory and enjoy!.

This page is powered by Blogger. Isn't yours?