Files
bsdports/data/postgresql96/files/patch-ICU-pg-96b4-icu-2016-08-10.diff
2019-12-26 07:26:06 +00:00

911 lines
24 KiB
Diff

diff --git a/.gitignore b/.gitignore
index cbf8d79..8218549 100644
--- .gitignore
+++ .gitignore
@@ -38,3 +38,5 @@ lib*.pc
/Debug/
/Release/
/tmp_install/
+
+/configure
diff --git a/configure.in b/configure.in
index 598fbd8..b83545c 100644
--- configure.in
+++ configure.in
@@ -730,6 +730,16 @@ AC_SUBST(with_systemd)
AC_MSG_RESULT([$with_systemd])
#
+# ICU
+#
+AC_MSG_CHECKING([whether to build with ICU support])
+PGAC_ARG_BOOL(with, icu, no, [ --with-icu build with ICU support],
+ [AC_DEFINE([USE_ICU], 1, [Define to build with ICU support. (--with-icu)])])
+AC_MSG_RESULT([$with_icu])
+AC_SUBST(with_icu)
+
+
+#
# Readline
#
PGAC_ARG_BOOL(with, readline, yes,
@@ -1120,6 +1130,63 @@ if test "$with_openssl" = yes ; then
AC_CHECK_FUNCS([SSL_get_current_compression])
fi
+if test "$with_icu" = yes ; then
+ AC_CHECK_LIB(icui18n, ucol_open_57, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_56, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_55, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_54, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_53, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_52, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_50, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_48, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_46, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_44, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_43, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_3_8, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_3_6, [], [
+ AC_CHECK_LIB(icui18n, ucol_open, [], [AC_MSG_ERROR([library 'icui18n' is required for ICU])])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_57, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_56, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_55, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_54, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_53, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_52, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_50, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_48, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_46, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_44, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_43, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_3_8, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_3_6, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars, [], [AC_MSG_ERROR([library 'icuuc' is required for ICU])])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+fi
+
if test "$with_pam" = yes ; then
AC_CHECK_LIB(pam, pam_start, [], [AC_MSG_ERROR([library 'pam' is required for PAM])])
fi
@@ -1273,6 +1340,10 @@ if test "$with_openssl" = yes ; then
AC_CHECK_HEADER(openssl/err.h, [], [AC_MSG_ERROR([header file <openssl/err.h> is required for OpenSSL])])
fi
+if test "$with_icu" = yes ; then
+ AC_CHECK_HEADER(unicode/utypes.h, [], [AC_MSG_ERROR([header file <unicode/utypes.h> is required for ICU])])
+fi
+
if test "$with_pam" = yes ; then
AC_CHECK_HEADERS(security/pam_appl.h, [],
[AC_CHECK_HEADERS(pam/pam_appl.h, [],
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index bbd97dc..6d8886e 100644
--- src/backend/utils/adt/formatting.c
+++ src/backend/utils/adt/formatting.c
@@ -92,6 +92,12 @@
#include "utils/numeric.h"
#include "utils/pg_locale.h"
+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/uchar.h>
+#include <unicode/ucasemap.h>
+#endif /* USE_ICU */
+
/* ----------
* Routines type
* ----------
@@ -940,6 +946,11 @@ typedef struct NUMProc
} NUMProc;
+#ifdef USE_ICU
+static UCaseMap *default_casemap = NULL; /* used for UTF-8 transcriptions */
+#endif /* USE_ICU */
+
+
/* ----------
* Functions
* ----------
@@ -1491,6 +1502,68 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
{
result = asc_tolower(buff, nbytes);
}
+#ifdef USE_ICU
+ else if (GetDatabaseEncoding() == PG_UTF8) {
+ /*
+ * optimized and much simpler version for UTF-8
+ */
+ uint32_t buflen;
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *casemap;
+
+ if (default_casemap == NULL)
+ {
+ default_casemap = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(ERROR,
+ (errcode(status),
+ errmsg("ICU error: oracle_compat.c, could not get UCaseMap.")));
+ }
+ }
+
+ if (collid != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ casemap = pg_icu_casemap_from_collation(collid);
+ }
+ else
+ {
+ casemap = default_casemap;
+ }
+
+ result = palloc(nbytes + 1); /* add a byte for null termination */
+ /* run desired function */
+ buflen = ucasemap_utf8ToLower(casemap, result, nbytes + 1, buff, nbytes, &status);
+ /*
+ * In some corner cases like Turkic `I', resulting char* can be longer than source.
+ * Accept that we run the transcription twice in these rare cases rather than wasting
+ * memory or clock cycles trying to figure out the correct size.
+ */
+ if (buflen > nbytes) {
+ pfree(result);
+ result = palloc(buflen + 1);
+ status = U_ZERO_ERROR;
+ buflen = ucasemap_utf8ToLower(casemap, result, buflen + 1, buff, nbytes, &status);
+ }
+ if (U_FAILURE(status))
+ {
+ ereport(ERROR,
+ (errcode(status),
+ errmsg("ICU error: Could not modify case")));
+ }
+ }
+#endif /* USE_ICU */
#ifdef USE_WIDE_UPPER_LOWER
else if (pg_database_encoding_max_length() > 1)
{
@@ -1611,6 +1684,68 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
{
result = asc_toupper(buff, nbytes);
}
+#ifdef USE_ICU
+ else if (GetDatabaseEncoding() == PG_UTF8) {
+ /*
+ * optimized and much simpler version for UTF-8
+ */
+ uint32_t buflen;
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *casemap;
+
+ if (default_casemap == NULL)
+ {
+ default_casemap = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(ERROR,
+ (errcode(status),
+ errmsg("ICU error: oracle_compat.c, could not get UCaseMap.")));
+ }
+ }
+
+ if (collid != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ casemap = pg_icu_casemap_from_collation(collid);
+ }
+ else
+ {
+ casemap = default_casemap;
+ }
+
+ result = palloc(nbytes + 1); // add a byte for null termination
+ /* run desired function */
+ buflen = ucasemap_utf8ToUpper(casemap, result, nbytes + 1, buff, nbytes, &status);
+ /*
+ * In some corner cases like Turkic `I', resulting char* can be longer than source.
+ * Accept that we run the transcription twice in these rare cases rather than wasting
+ * memory or clock cycles trying to figure out the correct size.
+ */
+ if (buflen > nbytes) {
+ pfree(result);
+ result = palloc(buflen + 1);
+ status = U_ZERO_ERROR;
+ buflen = ucasemap_utf8ToUpper(casemap, result, buflen + 1, buff, nbytes, &status);
+ }
+ if (U_FAILURE(status))
+ {
+ ereport(ERROR,
+ (errcode(status),
+ errmsg("ICU error: Could not modify case")));
+ }
+ }
+#endif /* USE_ICU */
#ifdef USE_WIDE_UPPER_LOWER
else if (pg_database_encoding_max_length() > 1)
{
@@ -1732,6 +1867,69 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
{
result = asc_initcap(buff, nbytes);
}
+#ifdef USE_ICU
+ else if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ /*
+ * optimized and much simpler version for UTF-8
+ */
+ uint32_t buflen;
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *casemap;
+
+ if (default_casemap == NULL)
+ {
+ default_casemap = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(ERROR,
+ (errcode(status),
+ errmsg("ICU error: oracle_compat.c, could not get UCaseMap.")));
+ }
+ }
+
+ if (collid != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ casemap = pg_icu_casemap_from_collation(collid);
+ }
+ else
+ {
+ casemap = default_casemap;
+ }
+
+ result = palloc(nbytes + 1); // add a byte for null termination
+ /* run desired function */
+ buflen = ucasemap_utf8ToTitle(casemap, result, nbytes + 1, buff, nbytes, &status);
+ /*
+ * In some corner cases like Turkic `I', resulting char* can be longer than source.
+ * Accept that we run the transcription twice in these rare cases rather than wasting
+ * memory or clock cycles trying to figure out the correct size.
+ */
+ if (buflen > nbytes) {
+ pfree(result);
+ result = palloc(buflen + 1);
+ status = U_ZERO_ERROR;
+ buflen = ucasemap_utf8ToTitle(casemap, result, buflen + 1, buff, nbytes, &status);
+ }
+ if (U_FAILURE(status))
+ {
+ ereport(ERROR,
+ (errcode(status),
+ errmsg("ICU error: Could not modify case")));
+ }
+ }
+#endif /* USE_ICU */
#ifdef USE_WIDE_UPPER_LOWER
else if (pg_database_encoding_max_length() > 1)
{
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index a818023..2c6e822 100644
--- src/backend/utils/adt/pg_locale.c
+++ src/backend/utils/adt/pg_locale.c
@@ -63,6 +63,10 @@
#include "utils/pg_locale.h"
#include "utils/syscache.h"
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
#ifdef WIN32
/*
* This Windows file defines StrNCpy. We don't need it here, so we undefine
@@ -118,6 +122,10 @@ typedef struct
bool ctype_is_c; /* is collation's LC_CTYPE C? */
bool flags_valid; /* true if above flags are valid */
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
+#ifdef USE_ICU
+ UCollator *icu_collator;
+ UCaseMap *icu_casemap;
+#endif
} collation_cache_entry;
static HTAB *collation_cache = NULL;
@@ -1127,6 +1135,50 @@ report_newlocale_failure(const char *localename)
}
#endif /* HAVE_LOCALE_T */
+#ifdef USE_ICU
+UCollator *
+pg_icu_collator_from_collation(Oid collid)
+{
+ collation_cache_entry *cache_entry;
+
+ /* Callers must pass a valid OID */
+ Assert(OidIsValid(collid));
+
+ /* Return 0 for "default" collation, just in case caller forgets */
+ if (collid == DEFAULT_COLLATION_OID)
+ return NULL;
+
+ cache_entry = lookup_collation_cache(collid, false);
+
+ if (cache_entry->locale == 0)
+ {
+ pg_newlocale_from_collation(collid);
+ cache_entry = lookup_collation_cache(collid, false);
+ }
+ return cache_entry->icu_collator;
+}
+
+UCaseMap *pg_icu_casemap_from_collation(Oid collid)
+{
+ collation_cache_entry *cache_entry;
+
+ /* Callers must pass a valid OID */
+ Assert(OidIsValid(collid));
+
+ /* Return 0 for "default" collation, just in case caller forgets */
+ if (collid == DEFAULT_COLLATION_OID)
+ return NULL;
+
+ cache_entry = lookup_collation_cache(collid, false);
+
+ if (cache_entry->locale == 0)
+ {
+ pg_newlocale_from_collation(collid);
+ cache_entry = lookup_collation_cache(collid, false);
+ }
+ return cache_entry->icu_casemap;
+}
+#endif
/*
* Create a locale_t from a collation OID. Results are cached for the
@@ -1176,6 +1228,26 @@ pg_newlocale_from_collation(Oid collid)
collcollate = NameStr(collform->collcollate);
collctype = NameStr(collform->collctype);
+#ifdef USE_ICU
+ UErrorCode status = U_ZERO_ERROR;
+ UCollator *icu_collator = ucol_open(collcollate, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: pg_locale.c, could not open collator %s", collcollate)));
+ }
+ cache_entry->icu_collator = icu_collator;
+
+ UCaseMap *icu_casemap = ucasemap_open(collcollate, U_FOLD_CASE_DEFAULT, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: pg_locale.c, could not open casemap %s", collcollate)));
+ }
+ cache_entry->icu_casemap = icu_casemap;
+#endif
if (strcmp(collcollate, collctype) == 0)
{
/* Normal case where they're the same */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index bf7c0cd..c67240d 100644
--- src/backend/utils/adt/varlena.c
+++ src/backend/utils/adt/varlena.c
@@ -35,6 +35,18 @@
#include "utils/pg_locale.h"
#include "utils/sortsupport.h"
+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/uchar.h>
+#include <unicode/ucasemap.h>
+#include <unicode/utypes.h> /* Basic ICU data types */
+#include <unicode/ucnv.h> /* C Converter API */
+#include <unicode/ucol.h>
+#include <unicode/uloc.h>
+#include "unicode/uiter.h"
+static UCollator *default_collator = NULL;
+#endif /* USE_ICU */
+
/* GUC variable */
int bytea_output = BYTEA_OUTPUT_HEX;
@@ -75,6 +87,9 @@ typedef struct
#ifdef HAVE_LOCALE_T
pg_locale_t locale;
#endif
+#ifdef USE_ICU
+ UCollator *icu_collator;
+#endif
} VarStringSortSupport;
/*
@@ -1396,6 +1411,94 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
+
+ else if (collid != DEFAULT_COLLATION_OID && !OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ /*
+ * memcmp() can't tell us which of two unequal strings sorts first,
+ * but it's a cheap way to tell if they're equal. Testing shows that
+ * memcmp() followed by strcoll() is only trivially slower than
+ * strcoll() by itself, so we don't lose much if this doesn't work out
+ * very often, and if it does - for example, because there are many
+ * equal strings in the input - then we win big by avoiding expensive
+ * collation-aware comparisons.
+ */
+ else if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
+ result = 0;
+
+#ifdef USE_ICU
+
+ else if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UCollator *collator;
+ UErrorCode status = U_ZERO_ERROR;
+
+ /* We keep a static default collator "forever" per session,
+ * since it is hard coded into the database cluster at initdb
+ * time anyway. We create it first time we get here. */
+ if (default_collator == NULL)
+ {
+ /* Expect LC_COLLATE to be set to something that ICU
+ * will understand. This is quite probable, since ICU
+ * does a lot of heuristics with this argument. I'd
+ * rather set this in xlog.c, but it seems ICU forgets
+ * it??? */
+ uloc_setDefault(setlocale(LC_COLLATE, NULL), &status);
+ if(U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: varlena.c, could not set default lc_collate")));
+ }
+ default_collator = ucol_open(NULL, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: varlena.c, could not open collator")));
+ }
+ }
+
+ if (collid != DEFAULT_COLLATION_OID)
+ collator = pg_icu_collator_from_collation(collid);
+ else
+ collator = default_collator;
+
+ UCharIterator sIter, tIter;
+ uiter_setUTF8(&sIter, arg1, len1);
+ uiter_setUTF8(&tIter, arg2, len2);
+ result = ucol_strcollIter(collator, &sIter, &tIter, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: varlena.c, could not collate")));
+ }
+ /*
+ * In some locales wcscoll() can claim that nonidentical strings
+ * are equal. Believing that this might be so also for ICU, and
+ * believing that would be bad news for a number of
+ * reasons, we follow Perl's lead and sort "equal" strings
+ * according to strcmp (on the byte representation).
+ */
+ if (result == 0)
+ {
+ result = strncmp(arg1, arg2, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+ }
+ }
+#endif /* USE_ICU */
+
else
{
char a1buf[TEXTBUFLEN];
@@ -1409,34 +1512,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
if (collid != DEFAULT_COLLATION_OID)
{
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for string comparison"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
#ifdef HAVE_LOCALE_T
mylocale = pg_newlocale_from_collation(collid);
#endif
}
- /*
- * memcmp() can't tell us which of two unequal strings sorts first,
- * but it's a cheap way to tell if they're equal. Testing shows that
- * memcmp() followed by strcoll() is only trivially slower than
- * strcoll() by itself, so we don't lose much if this doesn't work out
- * very often, and if it does - for example, because there are many
- * equal strings in the input - then we win big by avoiding expensive
- * collation-aware comparisons.
- */
- if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
- return 0;
-
#ifdef WIN32
/* Win32 does not have UTF-8, so we need to map to UTF-16 */
if (GetDatabaseEncoding() == PG_UTF8)
@@ -1771,6 +1851,9 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
#ifdef HAVE_LOCALE_T
pg_locale_t locale = 0;
#endif
+#ifdef USE_ICU
+ UCollator *icu_collator = NULL;
+#endif
/*
* If possible, set ssup->comparator to a function which can be used to
@@ -1828,6 +1911,37 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
#ifdef HAVE_LOCALE_T
locale = pg_newlocale_from_collation(collid);
#endif
+#ifdef USE_ICU
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ icu_collator = pg_icu_collator_from_collation(collid);
+ }
+ }
+ else if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ /* We keep a static default collator "forever" per session,
+ * as per discussion in varstr_cmp(). */
+ if (default_collator == NULL)
+ {
+ UErrorCode status = U_ZERO_ERROR;
+
+ uloc_setDefault(setlocale(LC_COLLATE, NULL), &status);
+ if(U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: varlena.c, could not set default lc_collate")));
+ }
+ default_collator = ucol_open(NULL, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: varlena.c, could not open collator")));
+ }
+ }
+ icu_collator = default_collator;
+#endif
}
}
@@ -1879,6 +1993,9 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
#ifdef HAVE_LOCALE_T
sss->locale = locale;
#endif
+#ifdef USE_ICU
+ sss->icu_collator = icu_collator;
+#endif
/*
* To avoid somehow confusing a strxfrm() blob and an original string,
@@ -2089,6 +2206,23 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
goto done;
}
+#ifdef USE_ICU
+ if (GetDatabaseEncoding() == PG_UTF8 && sss->icu_collator)
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ UCharIterator sIter, tIter;
+ uiter_setUTF8(&sIter, a1p, len1);
+ uiter_setUTF8(&tIter, a2p, len2);
+ result = ucol_strcollIter(sss->icu_collator, &sIter, &tIter, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(WARNING,
+ (errcode(status),
+ errmsg("ICU Error: varlena.c, could not collate")));
+ }
+ }
+ else
+#endif
#ifdef HAVE_LOCALE_T
if (sss->locale)
result = strcoll_l(sss->buf1, sss->buf2, sss->locale);
diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c
index 11099b8..d411f45 100644
--- src/backend/utils/mb/encnames.c
+++ src/backend/utils/mb/encnames.c
@@ -403,6 +403,118 @@ const pg_enc2gettext pg_enc2gettext_tbl[] =
};
+#ifdef USE_ICU
+/*
+ * Try to map most internal character encodings to the proper and
+ * preferred IANA string. Use this in mbutils.c to feed ICU info about
+ * the database's character encoding.
+ *
+ * Palle Girgensohn, 2005
+ */
+
+pg_enc2name pg_enc2iananame_tbl[] =
+{
+ {
+ "US-ASCII", PG_SQL_ASCII
+ },
+ {
+ "EUC-JP", PG_EUC_JP
+ },
+ {
+ "GB2312", PG_EUC_CN
+ },
+ {
+ "EUC-KR", PG_EUC_KR
+ },
+ {
+ "ISO-2022-CN", PG_EUC_TW
+ },
+ {
+ "KS_C_5601-1987", PG_JOHAB /* either KS_C_5601-1987 or ISO-2022-KR ??? */
+ },
+ {
+ "UTF-8", PG_UTF8
+ },
+ {
+ "MULE_INTERNAL", PG_MULE_INTERNAL /* is not for real */
+ },
+ {
+ "ISO-8859-1", PG_LATIN1
+ },
+ {
+ "ISO-8859-2", PG_LATIN2
+ },
+ {
+ "ISO-8859-3", PG_LATIN3
+ },
+ {
+ "ISO-8859-4", PG_LATIN4
+ },
+ {
+ "ISO-8859-9", PG_LATIN5
+ },
+ {
+ "ISO-8859-10", PG_LATIN6
+ },
+ {
+ "ISO-8859-13", PG_LATIN7
+ },
+ {
+ "ISO-8859-14", PG_LATIN8
+ },
+ {
+ "ISO-8859-15", PG_LATIN9
+ },
+ {
+ "ISO-8859-16", PG_LATIN10
+ },
+ {
+ "windows-1256", PG_WIN1256
+ },
+ {
+ "windows-874", PG_WIN874
+ },
+ {
+ "KOI8-R", PG_KOI8R
+ },
+ {
+ "windows-1251", PG_WIN1251
+ },
+ {
+ "ISO-8859-5", PG_ISO_8859_5
+ },
+ {
+ "ISO-8859-6", PG_ISO_8859_6
+ },
+ {
+ "ISO-8859-7", PG_ISO_8859_7
+ },
+ {
+ "ISO-8859-8", PG_ISO_8859_8
+ },
+ {
+ "windows-1250", PG_WIN1250
+ },
+ {
+ "Shift_JIS", PG_SJIS
+ },
+ {
+ "Big5", PG_BIG5
+ },
+ {
+ "GBK", PG_GBK
+ },
+ {
+ "cp949", PG_UHC
+ },
+ {
+ "GB18030", PG_GB18030
+ }
+};
+#endif /* USE_ICU */
+
+
+
/* ----------
* Encoding checks, for error returns -1 else encoding id
* ----------
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 7f1c881..7b7bc01 100644
--- src/backend/utils/mb/mbutils.c
+++ src/backend/utils/mb/mbutils.c
@@ -40,6 +40,10 @@
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/ucnv.h>
+#endif /* USE_ICU */
/*
* When converting strings between different encodings, we assume that space
@@ -913,6 +917,9 @@ SetDatabaseEncoding(int encoding)
DatabaseEncoding = &pg_enc2name_tbl[encoding];
Assert(DatabaseEncoding->encoding == encoding);
+#ifdef USE_ICU
+ ucnv_setDefaultName((&pg_enc2iananame_tbl[encoding])->name);
+#endif
}
void
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 24e8d0d..11da225 100644
--- src/include/mb/pg_wchar.h
+++ src/include/mb/pg_wchar.h
@@ -321,6 +321,10 @@ typedef struct pg_enc2name
extern const pg_enc2name pg_enc2name_tbl[];
+#ifdef USE_ICU
+extern pg_enc2name pg_enc2iananame_tbl[];
+#endif
+
/*
* Encoding names for gettext
*/
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index b621ff2..de58917 100644
--- src/include/pg_config.h.in
+++ src/include/pg_config.h.in
@@ -288,6 +288,12 @@
/* Define to 1 if you have the `crypto' library (-lcrypto). */
#undef HAVE_LIBCRYPTO
+/* Define to 1 if you have the `icui18n' library (-licui18n). */
+#undef HAVE_LIBICUI18N
+
+/* Define to 1 if you have the `icuuc' library (-licuuc). */
+#undef HAVE_LIBICUUC
+
/* Define to 1 if you have the `ldap' library (-lldap). */
#undef HAVE_LIBLDAP
@@ -796,6 +802,9 @@
/* Define to 1 to build with BSD Authentication support. (--with-bsd-auth) */
#undef USE_BSD_AUTH
+/* Define to build with ICU support. (--with-icu) */
+#undef USE_ICU
+
/* Define to 1 if you want float4 values to be passed by value.
(--enable-float4-byval) */
#undef USE_FLOAT4_BYVAL
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 0a4b9f7..d750536 100644
--- src/include/utils/pg_locale.h
+++ src/include/utils/pg_locale.h
@@ -19,6 +19,12 @@
#include "utils/guc.h"
+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/uchar.h>
+#include <unicode/ucasemap.h>
+#include <unicode/ucol.h>
+#endif
/* GUC settings */
extern char *locale_messages;
@@ -71,6 +77,10 @@ typedef locale_t pg_locale_t;
typedef int pg_locale_t;
#endif
+#ifdef USE_ICU
+extern UCollator * pg_icu_collator_from_collation(Oid collid);
+extern UCaseMap * pg_icu_casemap_from_collation(Oid collid);
+#endif
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */