/*
 * Copyright (c) 2003-2012
 * Distributed Systems Software.  All rights reserved.
 * See the file LICENSE for redistribution information.
 */

/*
 * String/Word completion web service
 * This general purpose string completion service can be a useful tool in
 * interactive interfaces where one item must be selected from a potentially
 * large number of items, such as choosing one name from a large phonebook
 * or choosing a filename or program name from those available in a given
 * context.  In these cases, presenting the user with a single-level or
 * multi-level list or menu is not workable.
 * This is intended to be useful with web pages that apply the XMLHttpRequest
 * function, or perhaps by scripts.
 *
 * Given a list of words (strings, in general), either:
 *   o find all completions for a specified word (string) (OP == "complete")
 *   o find the maximal extension of the word             (OP == "extend")
 *   o find all strings that match the word               (OP == "select")
 *   o find the item in the word list at a given index    (OP == "index")
 *
 * The word list can already be sorted or a parameter can request sorting
 * qsort(3) is used.
 *
 * For example, say the word list is: bank, book, booking, books, booze
 * If the completion word is "booki", then the completion is "booking".
 * If the completion word is "book", then the completion is "book", "booking",
 * and "books".
 * If the completion word is "bo", then the extension is "book".
 *
 * Arguments:
 * WORD_LIST: the word list, with elements separated by an (encoded) newline
 *            character (%0a).
 * URL: the URL to be dereferenced to obtain the word list, with elements
 *      separated by an (encoded) newline character.
 * DICT: a predefined word list;
 *       "states"          - the list of US states
 *       "provinces"       - the list of Canadian provinces
 *       "statesprovinces" -
 *       "/usr/share/dict/words" - a large list of words
 * One of WORD_LIST, URL, or DICT should be specified, but the default is
 * DICT="statesprovinces".  If more than one is specified, they are examined
 * in that order and the first non-null valued argument is used.
 *
 * WORD: the (optional) completion word; if absent, the entire word list is
 *       matched
 * SORT: if the elements of DICT are already sorted, argument SORT should be
 *       either omitted or "no"; if DICT is needs to be sorted, SORT should be
 *       "yes" and the ASCII collating sequence is used, and unless
 *       argument ICASE is "yes", case-sensitive comparison is used.
 * OP:   "complete" (or unspecified) - the completion function is used,
 *       returning zero or more newline-separated words from DICT;
 *       "extend" -- the extension function is used (returning
 *       zero or one word);
 *       "select" -- WORD is treated as a regular expression that is
 *       matched against the word list;
 *       "index"  -- an argument named INDEX is expected that has an
 *       unsigned integer value that specifies the offset within the word list
 *       from which the return value is to be extracted;
 *       "count"  -- the number of words in the specified word list is returned.
 * ICASE: if "yes", matching is case insensitive.
 * LIMIT: an unsigned integer that specifies the maximum number of
 *       words to return; if this value is exceeded, an error message is
 *       returned.
 * OUTPUT: "text", "table", or "ttable"
 * DEBUG: if "yes", debugging output is emitted
 */

#ifndef lint
static const char copyright[] =
"Copyright (c) 2003-2012\n\
Distributed Systems Software.  All rights reserved.";
static const char revid[] =
  "$Id: complete.c 2594 2012-10-19 17:28:49Z brachman $";
#endif

#include "dacs.h"
#include <math.h>

static MAYBE_UNUSED char *log_module_name = "dacs_complete";

enum {
  COMPLETION_LIMIT_DEFAULT = 200,
  COMPLETION_MAX_COLS      = 12
};

typedef enum {
  OUTPUT_TEXT    = 0,
  OUTPUT_TABLE   = 1,
  OUTPUT_TTABLE  = 2
} Output_format;

typedef struct State_desc {
  char *state;
  char *abbrev;
} State_desc;

static State_desc state_desc[] = {
  { "Alabama", "AL" },        { "Alaska", "AK" },
  { "Arizona", "AZ" },        { "Arkansas", "AR" },
  { "California", "CA" },     { "Colorado", "CO" },
  { "Connecticut", "CT" },    { "Delaware", "DE" },
  { "District of Columbia", "DC" }, { "Florida", "FL" },
  { "Georgia", "GA" },        { "Hawaii", "HI" },
  { "Idaho", "ID" },          { "Illinois", "IL" },
  { "Indiana", "IN" },        { "Iowa", "IA" },
  { "Kansas", "KS" },         { "Kentucky", "KY" },
  { "Louisiana", "LA" },      { "Maine", "ME" },
  { "Maryland", "MD" },       { "Massachusetts", "MA" },
  { "Michigan", "MI" },       { "Minnesota", "MN" },
  { "Mississippi", "MS" },    { "Missouri", "MO" },
  { "Montana", "MT" },        { "Nebraska", "NE" },
  { "Nevada", "NV" },         { "New Hampshire", "NH" },
  { "New Jersey", "NJ" },     { "New Mexico", "NM" },
  { "New York", "NY" },       { "North Carolina", "NC" },
  { "North Dakota", "ND" },   { "Ohio", "OH" },
  { "Oklahoma", "OK" },       { "Oregon", "OR" },
  { "Pennsylvania", "PA" },   { "Rhode Island", "RI" },
  { "South Carolina", "SC" }, { "South Dakota", "SD" },
  { "Tennessee", "TN" },      { "Texas", "TX" },
  { "Utah", "UT" },           { "Vermont", "VT" },
  { "Virginia", "VA" },       { "Washington", "WA" },
  { "West Virginia", "WV" },  { "Wisconsin", "WI" },
  { "Wyoming", "WY" },        { NULL, NULL }
};

static State_desc prov_desc[] = {
  { "British Columbia", "BC" }, { "Alberta", "AB" },
  { "Saskatchewan", "SK" },     { "Manitoba", "MB" },
  { "Ontario", "ON" },          { "Quebec", "QC" },
  { "New Brunswick", "NB" },    { "Nova Scotia", "NS" },
  { "Nunavut", "NU" },          { "Prince Edward Island", "PE" },
  { "Newfoundland", "NF" },     { "Northwest Territories", "NT" },
  { "Yukon", "YT" },            { NULL, NULL }
};

static int debug = 0;

static void
emit(FILE *fp, Dsvec *dsv, Output_format output_format, int start, int limit)
{
  int i;
  char *p;

  if (start == (limit -1)) {
	p = (char *) dsvec_ptr_index(dsv, start);
	fprintf(fp, "%s\n", p);
	return;
  }

  if (output_format == OUTPUT_TABLE) {
	int ncells, ncols, nrows;
	Dsvec *attrs;
	Html_table *tab;

	ncells = limit - start;
	if (ncells < 10)
	  ncols = 1;
	else
	  ncols = (int) sqrt((double) ncells);
	if (ncols > COMPLETION_MAX_COLS)
	  ncols = COMPLETION_MAX_COLS;

	nrows = (ncells + ncols - 1) / ncols;
	if (debug) {
	  printf("ncells=%d\n", ncells);
	  printf("ncols=%d\n", ncols);
	  printf("nrows=%d\n", nrows);
	}

	tab = html_table(NULL, NULL);
	tab->row_class = "tr";
	tab->auto_row_nclasses = 2;
	html_table_begin(tab, NULL, ncols);
	html_row_begin(tab);
	for (i = start; i < limit; i++) {
	  p = (char *) dsvec_ptr_index(dsv, i);
	  html_cell(tab, p);
	}
	html_table_end(tab);
	fprintf(fp, "%s", ds_buf(tab->ds));
  }
  else if (output_format == OUTPUT_TTABLE) {
	int ncells, ncols, nrows, r;
	Dsvec *attrs;
	Html_table *tab;

	ncells = limit - start;
	if (ncells < 10) {
	  nrows = ncells;
	  ncols = 1;
	}
	else {
	  ncols = (int) sqrt((double) ncells);
	  if (ncols > COMPLETION_MAX_COLS)
		ncols = COMPLETION_MAX_COLS;
	  nrows = (ncells + ncols - 1) / ncols;
	}

	if (debug) {
	  printf("ncells=%d\n", ncells);
	  printf("ncols=%d\n", ncols);
	  printf("nrows=%d\n", nrows);
	}

	tab = html_table(NULL, NULL);
	tab->row_class = "tr";
	tab->auto_row_nclasses = 2;
	attrs = dsvec_init(NULL, sizeof(char *));
	dsvec_add_ptr(attrs, "width=\"100%\"");

	html_table_begin(tab, attrs, ncols);
	for (r = 0; r < nrows; r++) {
	  html_row_begin(tab);
	  for (i = start + r; i < limit; i += nrows) {
		p = (char *) dsvec_ptr_index(dsv, i);
		html_cellf(tab, "<a onclick=\"doSet(%d)\">%s</a>", i, p);
	  }
	  html_row_end(tab);
	}
	html_table_end(tab);
	fprintf(fp, "%s", ds_buf(tab->ds));
  }
  else {
	for (i = start; i < limit; i++) {
	  p = (char *) dsvec_ptr_index(dsv, i);
	  fprintf(fp, "%s\n", p);
	}
  }

}

int
main(int argc, char **argv)
{
  int do_complete, do_count, do_extend, do_index, do_select, do_sort;
  int c, emit_html, i, icase_flag, st;
  unsigned int completion_limit;
  char *dict, *icase_str, *word_list_str, *errmsg, *op_str, *p;
  char *sort_str, *request_url, *word_str;
  Completions completions;
  DACS_app_type app_type;
  Dsvec *words;
  Html_header_conf *hc;
  Kwv *kwv;
  Kwv_pair *v;
  Output_format output_format;

  debug = 0;
  emit_html = 0;
  hc = emit_html_header_conf(NULL);
  errmsg = NULL;

  app_type = DACS_STANDALONE_SERVICE;
  app_type = DACS_WEB_SERVICE;
  st = dacs_init(app_type, &argc, &argv, &kwv, &errmsg);

  emit_html ? emit_html_header(stdout, NULL) : emit_plain_header(stdout);
  if (st == -1) {
	if (errmsg != NULL)
	  printf("dacs_init() failed: %s\n", errmsg);
	else
	  printf("dacs_init() failed\n");
	emit_html ? emit_html_trailer(stdout) : emit_plain_trailer(stdout);

	exit(1);
  }

  if (emit_html) {
	if (conf_val(CONF_CSS_PATH) != NULL)
	  hc->css = ds_xprintf("%s/dacs_complete.css", conf_val(CONF_CSS_PATH));
	else
	  hc->css = CSS_DIR/**/"/dacs_complete.css";
	hc->title = "Output of dacs_complete";

	emit_html_header(stdout, hc);
  }
  else
	emit_plain_header(stdout);

  completion_limit = COMPLETION_LIMIT_DEFAULT;

  if ((p = kwv_lookup_value(kwv, "DEBUG")) != NULL && strcaseeq(p, "yes"))
	debug = 1;

  /*
   * There are several use cases, each of which as three phases:
   * 1) matching (comparing the input string against a string item),
   * 2) listing (the information to display as candidate completion items), and
   * 3) result (after a single item is selected, the final value).
   *
   * Use Case 1. Basic Strings
   * This is simply a list of strings where matching is against each string,
   * the strings are listed, and a string is the result.  Choosing a word from
   * a dictionary to submit, for instance.
   * Use Case 2. Structured Strings
   * Each string must be parsed into three parts: a string for matching,
   * a string for display, and a string for the result.
   * For instance: walnuts|A delicious nut|http://example.net/lookup
   * Completion is against the string "walnuts", listing might display
   * "walnuts: A delicious nut", and selection might result in the value
   * "http://example.net/lookup?WORD=walnuts" being submitted.
   */

  if ((word_list_str = kwv_lookup_value(kwv, "WORD_LIST")) != NULL
	  && *word_list_str != '\0') {
	char *w;

	/* User-specified word lists. */
	if (debug)
	  printf("WORD_LIST=\"%s\"\n", word_list_str);

	if ((w = url_decode(word_list_str, NULL, NULL)) == NULL) {
	  errmsg = "url_decode() of WORD_LIST failed";
	  goto fail;
	}

	if ((words = dsvec_lines(NULL, w)) == NULL) {
	  errmsg = "WORD_LIST line split failed";
	  goto fail;
	}
  }
  else if ((request_url = kwv_lookup_value(kwv, "URL")) != NULL
		   && *request_url != '\0') {
	int reply_len, status_code;
	char *reply;

	/* User-specified word list via a URL. */
	reply_len = -1;
	if (http_get(request_url, NULL, NULL, &reply, &reply_len, &status_code,
				 NULL) == -1) {
	  errmsg = ds_xprintf("HTTP request failed: %s", reply);
	  goto fail;
	}
	if (status_code != 200) {
	  errmsg = "HTTP returned invalid status";
	  goto fail;
	}
	if ((words = dsvec_lines(NULL, reply)) == NULL) {
	  errmsg = "HTTP line split failed";
	  goto fail;
	}
  }
  else if ((dict = kwv_lookup_value(kwv, "DICT")) != NULL
		   && *dict != '\0') {
	/* Built-in lists and dictionaries. */
	if (strcaseeq(dict, "states")) {
	  words = dsvec_init(NULL, sizeof(char *));
	  for (i = 0; state_desc[i].state != NULL; i++)
		dsvec_add_ptr(words, state_desc[i].state);
	}
	else if (strcaseeq(dict, "provinces")) {
	  words = dsvec_init(NULL, sizeof(char *));
	  for (i = 0; prov_desc[i].state != NULL; i++)
		dsvec_add_ptr(words, prov_desc[i].state);
	}
	else if (strcaseeq(dict, "statesprovinces")) {
	  words = dsvec_init_size(NULL, sizeof(char *), 70);
	  for (i = 0; state_desc[i].state != NULL; i++)
		dsvec_add_ptr(words, state_desc[i].state);
	  for (i = 0; prov_desc[i].state != NULL; i++)
		dsvec_add_ptr(words, prov_desc[i].state);
	}
	else if (strcaseeq(dict, "/usr/share/dict/words")) {
	  words = dsvec_init_size(NULL, sizeof(char *), 300000);
	  if (dsvec_load_lines(words, "/usr/share/dict/words") == NULL) {
		errmsg = "Cannot load /usr/share/dict/words";
		goto fail;
	  }
	}
	else {
	  errmsg = "Unrecognized DICT argument";
	  goto fail;
	}
  }
  else {
	/* If nothing is specified, use state/provinces for test purposes... */
	words = dsvec_init_size(NULL, sizeof(char *), 70);
	for (i = 0; state_desc[i].state != NULL; i++)
	  dsvec_add_ptr(words, state_desc[i].state);
	for (i = 0; prov_desc[i].state != NULL; i++)
	  dsvec_add_ptr(words, prov_desc[i].state);
  }
  if (debug)
	printf("%d lines\n", dsvec_len(words));

  if ((p = kwv_lookup_value(kwv, "LIMIT")) != NULL) {
	if (strnum(p, STRNUM_UINZ, &completion_limit) == -1) {
	  errmsg = "Invalid LIMIT argument";
	  goto fail;
	}
  }
  if (debug)
	printf("completion_limit=%u\n", completion_limit);

  do_sort = 1;
  if ((sort_str = kwv_lookup_value(kwv, "SORT")) != NULL) {
    if (strcaseeq(sort_str, "no"))
	  do_sort = 0;
  }
  if (debug && sort_str != NULL)
	printf("SORT=\"%s\"\n", sort_str);

  if ((icase_str = kwv_lookup_value(kwv, "ICASE")) != NULL) {
    if (strcaseeq(icase_str, "yes"))
	  icase_flag = 1;
  }
  if (debug && icase_str != NULL)
	printf("ICASE=\"%s\"\n", icase_str);

  if (do_sort) {
	if (icase_flag)
	  dsvec_sort(words, dsvec_compar_string_nocase);
	else
	  dsvec_sort(words, dsvec_compar_string);
  }

  if (debug) {
	printf("Dict is:\n");
	for (i = 0; i < dsvec_len(words); i++) {
	  p = (char *) dsvec_ptr_index(words, i);
	  printf("\"%s\"\n", p);
	}
  }

  do_complete = 1;
  do_count = 0;
  do_extend = 0;
  do_index = 0;
  do_select = 0;
  if ((op_str = kwv_lookup_value(kwv, "OP")) != NULL) {
    if (strcaseeq(op_str, "complete"))
      do_complete = 1;
    else if (strcaseeq(op_str, "extend")) {
      do_complete = 0;
      do_extend = 1;
    }
    else if (strcaseeq(op_str, "select")) {
      do_complete = 0;
      do_select = 1;
    }
    else if (strcaseeq(op_str, "index")) {
      do_complete = 0;
      do_index = 1;
    }
    else if (strcaseeq(op_str, "count")) {
      do_complete = 0;
      do_count = 1;
    }
    else
      goto fail;
	if (debug)
	  printf("OP=\"%s\"\n", op_str);
  }

  output_format = OUTPUT_TEXT;
  if ((p = kwv_lookup_value(kwv, "OUTPUT")) != NULL) {
	if (strcaseeq(p, "TEXT"))
	  output_format = OUTPUT_TEXT;
	else if (strcaseeq(p, "TABLE"))
	  output_format = OUTPUT_TABLE;
	else if (strcaseeq(p, "TTABLE"))
	  output_format = OUTPUT_TTABLE;
	else {
	  errmsg = "Invalid OUTPUT argument";
	  goto fail;
	}
  }

  if (debug) {
	printf("do_complete=%d\n", do_complete);
	printf("do_count=%d\n", do_count);
	printf("do_extend=%d\n", do_extend);
	printf("do_index=%d\n", do_index);
	printf("do_select=%d\n", do_select);
	printf("do_sort=%d\n", do_sort);
	printf("completion_limit=%u\n", completion_limit);
	printf("icase_flag=%d\n", icase_flag);
	printf("output_format=%d\n", output_format);
  }

  if (do_count) {
	printf("%u\n", dsvec_len(words));
	goto done;
  }

  if (do_index) {
	unsigned int index_val;
	char *index_str;

	if ((index_str = kwv_lookup_value(kwv, "INDEX")) == NULL) {
	  errmsg = "INDEX argument is required";
	  goto fail;
	}
	if (strnum(index_str, STRNUM_UI, &index_val) == -1) {
	  errmsg = "Invalid INDEX argument";
	  goto fail;
	}

	if (index_val >= dsvec_len(words)) {
	  errmsg = ds_xprintf("INDEX argument is too big (%u >= %u)",
						  index_val, dsvec_len(words));
	  goto fail;
	}

	p = (char *) dsvec_ptr_index(words, index_val);
	printf("%s\n", p);

	goto done;
  }

  if ((word_str = kwv_lookup_value(kwv, "WORD")) == NULL) {
	if (dsvec_len(words) > completion_limit) {
	  printf("\n(Too many completions: %u)\n", dsvec_len(words));
	}
	else
	  emit(stdout, words, output_format, 0, dsvec_len(words));
	goto done;
  }
  if (debug)
	printf("WORD=\"%s\"\n", word_str);

  if (do_select) {
	Dsvec *dsv_s;
	Select_arg sa;

	sa.regex = word_str;
	sa.regex_flags = REG_EXTENDED | REG_NOSUB;
	if (icase_flag)
	  sa.regex_flags |= REG_ICASE;
	sa.anchored = 0;
	sa.errbuf = (char *) malloc(100);
	sa.errbuf_size = 100;
	dsv_s = dsvec_select(words, word_str, dsvec_select_initx, &sa, NULL);
	if (dsvec_len(dsv_s) > completion_limit) {
	  printf("\n(Too many completions: %u)\n", dsvec_len(dsv_s));
	}
	else
	  emit(stdout, dsv_s, output_format, 0, dsvec_len(dsv_s));
  }
  else {
	c = dsvec_complete(words, word_str, NULL, &icase_flag, &completions);
	if (debug)
	  printf("c=%d\n", c);

	if (c > 0) {
	  if (do_extend) {
		if (debug)
		  printf("Extension is: ");
		p = (char *) dsvec_ptr_index(words, completions.start);
		printf("%s\n",
			   strndup(p, completions.prefix_cnt + completions.skip_cnt));
	  }
	  else {
		unsigned int ncompletions;

		ncompletions = completions.end - completions.start + 1;
		if (debug)
		  printf("Completion is:\n");
		if (ncompletions > completion_limit) {
		  printf("\n(Too many completions: %u)\n", ncompletions);
		}
		else
		  emit(stdout, words, output_format,
			   completions.start, completions.end + 1);
	  }
	}
  }

 done:

  emit_html ? emit_html_trailer(stdout) : emit_plain_trailer(stdout);

  exit(0);

 fail:
  printf("%s\n", errmsg);
  emit_html ? emit_html_trailer(stdout) : emit_plain_trailer(stdout);

  exit(1);
}
