Amit Langote, reviewed by Beena Emerson, and further edited by me.
#include "utils/builtins.h"
/* GUC variable */
-extern bool bigm_enable_recheck;
-extern int bigm_gin_key_limit;
+extern bool bigm_enable_recheck;
+extern int bigm_gin_key_limit;
+extern double bigm_similarity_limit;
/* options */
#define LPADDING 1
/* operator strategy numbers */
#define LikeStrategyNumber 1
+#define SimilarityStrategyNumber 2
typedef struct
{
*recheck = true;
break;
}
+ case SimilarityStrategyNumber:
+ {
+ bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
+ bgmlen = ARRNELEM(bgm);
+ break;
+ }
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
bgm = NULL; /* keep compiler quiet */
bool *recheck = (bool *) PG_GETARG_POINTER(5);
bool res;
int32 i;
+ int32 ntrue;
Assert(nkeys > 0);
}
}
break;
+ case SimilarityStrategyNumber:
+ /* Count the matches */
+ ntrue = 0;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i])
+ ntrue++;
+ }
+#ifdef DIVUNION
+ res = (nkeys == ntrue) ? true :
+ ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >=
+ (float4) bigm_similarity_limit) ? true : false);
+#else
+ res = (nkeys == 0) ? false :
+ ((((((float4) ntrue) / ((float4) nkeys))) >=
+ (float4) bigm_similarity_limit) ? true : false);
+#endif
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
res = false; /* keep compiler quiet */
/* GUC variable */
bool bigm_enable_recheck = false;
int bigm_gin_key_limit = 0;
+double bigm_similarity_limit = 0.3;
char *bigm_last_update = NULL;
PG_FUNCTION_INFO_V1(show_bigm);
PG_FUNCTION_INFO_V1(bigm_similarity);
Datum bigm_similarity(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(bigm_similarity_op);
+Datum bigm_similarity_op(PG_FUNCTION_ARGS);
+
void _PG_init(void);
void _PG_fini(void);
NULL,
NULL);
+ DefineCustomRealVariable("pg_bigm.similarity_limit",
+ "Sets the similarity threshold used by the "
+ "=% operator.",
+ NULL,
+ &bigm_similarity_limit,
+ 0.3,
+ 0.0, 1.0,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
/* Can't be set in postgresql.conf */
DefineCustomStringVariable("pg_bigm.last_update",
"Shows the last update date of pg_bigm.",
}
Datum
+bigm_similarity_op(PG_FUNCTION_ARGS)
+{
+ float4 res = DatumGetFloat4(DirectFunctionCall2(bigm_similarity,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)));
+
+ PG_RETURN_BOOL(res >= (float4) bigm_similarity_limit);
+}
+
+Datum
likequery(PG_FUNCTION_ARGS)
{
text *query = PG_GETARG_TEXT_PP(0);
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE;
+CREATE FUNCTION bigm_similarity_op(text,text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE; -- stable because depends on pg_bigm.similarity_limit
+
+CREATE OPERATOR =% (
+ LEFTARG = text,
+ RIGHTARG = text,
+ PROCEDURE = bigm_similarity_op,
+ COMMUTATOR = '=%',
+ RESTRICT = contsel,
+ JOIN = contjoinsel
+);
+
-- support functions for gin
CREATE FUNCTION gin_extract_value_bigm(text, internal)
RETURNS internal
FOR TYPE text USING gin
AS
OPERATOR 1 pg_catalog.~~ (text, text),
+ OPERATOR 2 =% (text, text),
FUNCTION 1 bigmtextcmp (text, text),
FUNCTION 2 gin_extract_value_bigm (text, internal),
FUNCTION 3 gin_extract_query_bigm (text, internal, int2, internal, internal, internal, internal),
# pg_bigm extension
-comment = 'text index searching based on bigrams'
+comment = 'text similarity measurement and index searching based on bigrams'
default_version = '1.1'
module_pathname = '$libdir/pg_bigm'
relocatable = true