bsdports/data/postgresql96/files/patch-ICU-pg-96b4-icu-2016-08-10.diff

diff --git a/.gitignore b/.gitignore
index cbf8d79..8218549 100644
--- .gitignore
+++ .gitignore
@@ -38,3 +38,5 @@ lib*.pc
 /Debug/
 /Release/
 /tmp_install/
+
+/configure
diff --git a/configure.in b/configure.in
index 598fbd8..b83545c 100644
--- configure.in
+++ configure.in
@@ -730,6 +730,16 @@ AC_SUBST(with_systemd)
 AC_MSG_RESULT([$with_systemd])

 #
+# ICU
+#
+AC_MSG_CHECKING([whether to build with ICU support])
+PGAC_ARG_BOOL(with, icu, no, [  --with-icu              build with ICU support],
+              [AC_DEFINE([USE_ICU], 1, [Define to build with ICU support. (--with-icu)])])
+AC_MSG_RESULT([$with_icu])
+AC_SUBST(with_icu)
+
+
+#
 # Readline
 #
 PGAC_ARG_BOOL(with, readline, yes,
@@ -1120,6 +1130,63 @@ if test "$with_openssl" = yes ; then
   AC_CHECK_FUNCS([SSL_get_current_compression])
 fi

+if test "$with_icu" = yes ; then
+ AC_CHECK_LIB(icui18n, ucol_open_57, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_56, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_55, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_54, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_53, [], [
+ AC_CHECK_LIB(icui18n, ucol_open_52, [], [
+  AC_CHECK_LIB(icui18n, ucol_open_50, [], [
+   AC_CHECK_LIB(icui18n, ucol_open_48, [], [
+    AC_CHECK_LIB(icui18n, ucol_open_46, [], [
+      AC_CHECK_LIB(icui18n, ucol_open_44, [], [
+        AC_CHECK_LIB(icui18n, ucol_open_43, [], [
+          AC_CHECK_LIB(icui18n, ucol_open_3_8, [], [
+            AC_CHECK_LIB(icui18n, ucol_open_3_6, [], [
+              AC_CHECK_LIB(icui18n, ucol_open, [], [AC_MSG_ERROR([library 'icui18n' is required for ICU])])
+            ])
+          ])
+        ])
+      ])
+    ])
+   ])
+  ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_57, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_56, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_55, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_54, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_53, [], [
+ AC_CHECK_LIB(icuuc, ucnv_fromUChars_52, [], [
+  AC_CHECK_LIB(icuuc, ucnv_fromUChars_50, [], [
+   AC_CHECK_LIB(icuuc, ucnv_fromUChars_48, [], [
+    AC_CHECK_LIB(icuuc, ucnv_fromUChars_46, [], [
+      AC_CHECK_LIB(icuuc, ucnv_fromUChars_44, [], [
+        AC_CHECK_LIB(icuuc, ucnv_fromUChars_43, [], [
+          AC_CHECK_LIB(icuuc, ucnv_fromUChars_3_8, [], [
+            AC_CHECK_LIB(icuuc, ucnv_fromUChars_3_6, [], [
+              AC_CHECK_LIB(icuuc, ucnv_fromUChars, [], [AC_MSG_ERROR([library 'icuuc' is required for ICU])])
+            ])
+          ])
+        ])
+      ])
+    ])
+   ])
+  ])
+ ])
+ ])
+ ])
+ ])
+ ])
+ ])
+fi
+
 if test "$with_pam" = yes ; then
   AC_CHECK_LIB(pam,    pam_start, [], [AC_MSG_ERROR([library 'pam' is required for PAM])])
 fi
@@ -1273,6 +1340,10 @@ if test "$with_openssl" = yes ; then
   AC_CHECK_HEADER(openssl/err.h, [], [AC_MSG_ERROR([header file <openssl/err.h> is required for OpenSSL])])
 fi

+if test "$with_icu" = yes ; then
+  AC_CHECK_HEADER(unicode/utypes.h, [], [AC_MSG_ERROR([header file <unicode/utypes.h> is required for ICU])])
+fi
+
 if test "$with_pam" = yes ; then
   AC_CHECK_HEADERS(security/pam_appl.h, [],
                    [AC_CHECK_HEADERS(pam/pam_appl.h, [],
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index bbd97dc..6d8886e 100644
--- src/backend/utils/adt/formatting.c
+++ src/backend/utils/adt/formatting.c
@@ -92,6 +92,12 @@
 #include "utils/numeric.h"
 #include "utils/pg_locale.h"

+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/uchar.h>
+#include <unicode/ucasemap.h>
+#endif /* USE_ICU */
+
 /* ----------
  * Routines type
  * ----------
@@ -940,6 +946,11 @@ typedef struct NUMProc
 } NUMProc;


+#ifdef USE_ICU
+static UCaseMap *default_casemap = NULL; /* used for UTF-8 transcriptions */
+#endif   /* USE_ICU */
+
+
 /* ----------
  * Functions
  * ----------
@@ -1491,6 +1502,68 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 	{
 		result = asc_tolower(buff, nbytes);
 	}
+#ifdef USE_ICU
+	else if (GetDatabaseEncoding() == PG_UTF8) {
+		/*
+		 * optimized and much simpler version for UTF-8
+		 */
+		uint32_t	buflen;
+		UErrorCode	status = U_ZERO_ERROR;
+		UCaseMap   *casemap;
+
+		if (default_casemap == NULL)
+		{
+			default_casemap = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &status);
+			if (U_FAILURE(status))
+			{
+				ereport(ERROR,
+						(errcode(status),
+						 errmsg("ICU error: oracle_compat.c, could not get UCaseMap.")));
+			}
+		}
+
+		if (collid != DEFAULT_COLLATION_OID)
+		{
+			if (!OidIsValid(collid))
+			{
+				/*
+				 * This typically means that the parser could not resolve a
+				 * conflict of implicit collations, so report it that way.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("could not determine which collation to use for string comparison"),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+			}
+			casemap = pg_icu_casemap_from_collation(collid);
+		}
+		else
+		{
+			casemap = default_casemap;
+		}
+
+		result = palloc(nbytes + 1); /* add a byte for null termination */
+		/* run desired function */
+		buflen = ucasemap_utf8ToLower(casemap, result, nbytes + 1, buff, nbytes, &status);
+		/*
+		 * In some corner cases like Turkic `I', resulting char* can be longer than source.
+		 * Accept that we run the transcription twice in these rare cases rather than wasting
+		 * memory or clock cycles trying to figure out the correct size.
+		*/
+		if (buflen > nbytes) {
+			pfree(result);
+			result = palloc(buflen + 1);
+			status = U_ZERO_ERROR;
+			buflen = ucasemap_utf8ToLower(casemap, result, buflen + 1, buff, nbytes, &status);
+		}
+		if (U_FAILURE(status))
+		{
+			ereport(ERROR,
+						(errcode(status),
+						 errmsg("ICU error: Could not modify case")));
+		}
+	}
+#endif /* USE_ICU */
 #ifdef USE_WIDE_UPPER_LOWER
 	else if (pg_database_encoding_max_length() > 1)
 	{
@@ -1611,6 +1684,68 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 	{
 		result = asc_toupper(buff, nbytes);
 	}
+#ifdef USE_ICU
+	else if (GetDatabaseEncoding() == PG_UTF8) {
+		/*
+		 * optimized and much simpler version for UTF-8
+		 */
+		uint32_t	buflen;
+		UErrorCode	status = U_ZERO_ERROR;
+		UCaseMap   *casemap;
+
+		if (default_casemap == NULL)
+		{
+			default_casemap = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &status);
+			if (U_FAILURE(status))
+			{
+				ereport(ERROR,
+						(errcode(status),
+						 errmsg("ICU error: oracle_compat.c, could not get UCaseMap.")));
+			}
+		}
+
+		if (collid != DEFAULT_COLLATION_OID)
+		{
+			if (!OidIsValid(collid))
+			{
+				/*
+				 * This typically means that the parser could not resolve a
+				 * conflict of implicit collations, so report it that way.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("could not determine which collation to use for string comparison"),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+			}
+			casemap = pg_icu_casemap_from_collation(collid);
+		}
+		else
+		{
+			casemap = default_casemap;
+		}
+
+		result = palloc(nbytes + 1); // add a byte for null termination
+		/* run desired function */
+		buflen = ucasemap_utf8ToUpper(casemap, result, nbytes + 1, buff, nbytes, &status);
+		/*
+		 * In some corner cases like Turkic `I', resulting char* can be longer than source.
+		 * Accept that we run the transcription twice in these rare cases rather than wasting
+		 * memory or clock cycles trying to figure out the correct size.
+		*/
+		if (buflen > nbytes) {
+			pfree(result);
+			result = palloc(buflen + 1);
+			status = U_ZERO_ERROR;
+			buflen = ucasemap_utf8ToUpper(casemap, result, buflen + 1, buff, nbytes, &status);
+		}
+		if (U_FAILURE(status))
+		{
+			ereport(ERROR,
+						(errcode(status),
+						 errmsg("ICU error: Could not modify case")));
+		}
+	}
+#endif /* USE_ICU */
 #ifdef USE_WIDE_UPPER_LOWER
 	else if (pg_database_encoding_max_length() > 1)
 	{
@@ -1732,6 +1867,69 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 	{
 		result = asc_initcap(buff, nbytes);
 	}
+#ifdef USE_ICU
+	else if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		/*
+		 * optimized and much simpler version for UTF-8
+		 */
+		uint32_t	buflen;
+		UErrorCode	status = U_ZERO_ERROR;
+		UCaseMap   *casemap;
+
+		if (default_casemap == NULL)
+		{
+			default_casemap = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &status);
+			if (U_FAILURE(status))
+			{
+				ereport(ERROR,
+						(errcode(status),
+						 errmsg("ICU error: oracle_compat.c, could not get UCaseMap.")));
+			}
+		}
+
+		if (collid != DEFAULT_COLLATION_OID)
+		{
+			if (!OidIsValid(collid))
+			{
+				/*
+				 * This typically means that the parser could not resolve a
+				 * conflict of implicit collations, so report it that way.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("could not determine which collation to use for string comparison"),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+			}
+			casemap = pg_icu_casemap_from_collation(collid);
+		}
+		else
+		{
+			casemap = default_casemap;
+		}
+
+		result = palloc(nbytes + 1); // add a byte for null termination
+		/* run desired function */
+		buflen = ucasemap_utf8ToTitle(casemap, result, nbytes + 1, buff, nbytes, &status);
+		/*
+		 * In some corner cases like Turkic `I', resulting char* can be longer than source.
+		 * Accept that we run the transcription twice in these rare cases rather than wasting
+		 * memory or clock cycles trying to figure out the correct size.
+		*/
+		if (buflen > nbytes) {
+			pfree(result);
+			result = palloc(buflen + 1);
+			status = U_ZERO_ERROR;
+			buflen = ucasemap_utf8ToTitle(casemap, result, buflen + 1, buff, nbytes, &status);
+		}
+		if (U_FAILURE(status))
+		{
+			ereport(ERROR,
+						(errcode(status),
+						 errmsg("ICU error: Could not modify case")));
+		}
+	}
+#endif /* USE_ICU */
 #ifdef USE_WIDE_UPPER_LOWER
 	else if (pg_database_encoding_max_length() > 1)
 	{
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index a818023..2c6e822 100644
--- src/backend/utils/adt/pg_locale.c
+++ src/backend/utils/adt/pg_locale.c
@@ -63,6 +63,10 @@
 #include "utils/pg_locale.h"
 #include "utils/syscache.h"

+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
 #ifdef WIN32
 /*
  * This Windows file defines StrNCpy. We don't need it here, so we undefine
@@ -118,6 +122,10 @@ typedef struct
 	bool		ctype_is_c;		/* is collation's LC_CTYPE C? */
 	bool		flags_valid;	/* true if above flags are valid */
 	pg_locale_t locale;			/* locale_t struct, or 0 if not valid */
+#ifdef USE_ICU
+	UCollator  *icu_collator;
+	UCaseMap   *icu_casemap;
+#endif
 } collation_cache_entry;

 static HTAB *collation_cache = NULL;
@@ -1127,6 +1135,50 @@ report_newlocale_failure(const char *localename)
 }
 #endif   /* HAVE_LOCALE_T */

+#ifdef USE_ICU
+UCollator *
+pg_icu_collator_from_collation(Oid collid)
+{
+	collation_cache_entry *cache_entry;
+
+	/* Callers must pass a valid OID */
+	Assert(OidIsValid(collid));
+
+	/* Return 0 for "default" collation, just in case caller forgets */
+	if (collid == DEFAULT_COLLATION_OID)
+		return NULL;
+
+	cache_entry = lookup_collation_cache(collid, false);
+
+	if (cache_entry->locale == 0)
+	{
+		pg_newlocale_from_collation(collid);
+		cache_entry = lookup_collation_cache(collid, false);
+	}
+	return cache_entry->icu_collator;
+}
+
+UCaseMap *pg_icu_casemap_from_collation(Oid collid)
+{
+	collation_cache_entry *cache_entry;
+
+	/* Callers must pass a valid OID */
+	Assert(OidIsValid(collid));
+
+	/* Return 0 for "default" collation, just in case caller forgets */
+	if (collid == DEFAULT_COLLATION_OID)
+		return NULL;
+
+	cache_entry = lookup_collation_cache(collid, false);
+
+	if (cache_entry->locale == 0)
+	{
+		pg_newlocale_from_collation(collid);
+		cache_entry = lookup_collation_cache(collid, false);
+	}
+	return cache_entry->icu_casemap;
+}
+#endif

 /*
  * Create a locale_t from a collation OID.  Results are cached for the
@@ -1176,6 +1228,26 @@ pg_newlocale_from_collation(Oid collid)
 		collcollate = NameStr(collform->collcollate);
 		collctype = NameStr(collform->collctype);

+#ifdef USE_ICU
+		UErrorCode  status = U_ZERO_ERROR;
+		UCollator *icu_collator = ucol_open(collcollate, &status);
+		if (U_FAILURE(status))
+		{
+			ereport(WARNING,
+					(errcode(status),
+					 errmsg("ICU Error: pg_locale.c, could not open collator %s", collcollate)));
+		}
+		cache_entry->icu_collator = icu_collator;
+
+		UCaseMap *icu_casemap = ucasemap_open(collcollate, U_FOLD_CASE_DEFAULT, &status);
+		if (U_FAILURE(status))
+		{
+			ereport(WARNING,
+					(errcode(status),
+					 errmsg("ICU Error: pg_locale.c, could not open casemap %s", collcollate)));
+		}
+		cache_entry->icu_casemap = icu_casemap;
+#endif
 		if (strcmp(collcollate, collctype) == 0)
 		{
 			/* Normal case where they're the same */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index bf7c0cd..c67240d 100644
--- src/backend/utils/adt/varlena.c
+++ src/backend/utils/adt/varlena.c
@@ -35,6 +35,18 @@
 #include "utils/pg_locale.h"
 #include "utils/sortsupport.h"

+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/uchar.h>
+#include <unicode/ucasemap.h>
+#include <unicode/utypes.h>   /* Basic ICU data types */
+#include <unicode/ucnv.h>     /* C   Converter API    */
+#include <unicode/ucol.h>
+#include <unicode/uloc.h>
+#include "unicode/uiter.h"
+static UCollator *default_collator = NULL;
+#endif /* USE_ICU */
+

 /* GUC variable */
 int			bytea_output = BYTEA_OUTPUT_HEX;
@@ -75,6 +87,9 @@ typedef struct
 #ifdef HAVE_LOCALE_T
 	pg_locale_t locale;
 #endif
+#ifdef USE_ICU
+	UCollator  *icu_collator;
+#endif
 } VarStringSortSupport;

 /*
@@ -1396,6 +1411,94 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 		if ((result == 0) && (len1 != len2))
 			result = (len1 < len2) ? -1 : 1;
 	}
+
+	else if (collid != DEFAULT_COLLATION_OID && !OidIsValid(collid))
+	{
+		/*
+		 * This typically means that the parser could not resolve a
+		 * conflict of implicit collations, so report it that way.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_INDETERMINATE_COLLATION),
+				 errmsg("could not determine which collation to use for string comparison"),
+				 errhint("Use the COLLATE clause to set the collation explicitly.")));
+	}
+	/*
+	 * memcmp() can't tell us which of two unequal strings sorts first,
+	 * but it's a cheap way to tell if they're equal.  Testing shows that
+	 * memcmp() followed by strcoll() is only trivially slower than
+	 * strcoll() by itself, so we don't lose much if this doesn't work out
+	 * very often, and if it does - for example, because there are many
+	 * equal strings in the input - then we win big by avoiding expensive
+	 * collation-aware comparisons.
+	 */
+	else if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
+			result = 0;
+
+#ifdef USE_ICU
+
+	else if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		UCollator *collator;
+		UErrorCode status = U_ZERO_ERROR;
+
+		/* We keep a static default collator "forever" per session,
+		 * since it is hard coded into the database cluster at initdb
+		 * time anyway. We create it first time we get here. */
+		if (default_collator == NULL)
+		{
+			/* Expect LC_COLLATE to be set to something that ICU
+			 * will understand. This is quite probable, since ICU
+			 * does a lot of heuristics with this argument. I'd
+			 * rather set this in xlog.c, but it seems ICU forgets
+			 * it??? */
+			uloc_setDefault(setlocale(LC_COLLATE, NULL), &status);
+			if(U_FAILURE(status))
+			{
+				ereport(WARNING,
+						(errcode(status),
+						 errmsg("ICU Error: varlena.c, could not set default lc_collate")));
+			}
+			default_collator = ucol_open(NULL, &status);
+			if (U_FAILURE(status))
+			{
+				ereport(WARNING,
+						(errcode(status),
+						 errmsg("ICU Error: varlena.c, could not open collator")));
+			}
+		}
+
+		if (collid != DEFAULT_COLLATION_OID)
+			collator = pg_icu_collator_from_collation(collid);
+		else
+			collator = default_collator;
+
+		UCharIterator sIter, tIter;
+		uiter_setUTF8(&sIter, arg1, len1);
+		uiter_setUTF8(&tIter, arg2, len2);
+		result = ucol_strcollIter(collator, &sIter, &tIter, &status);
+		if (U_FAILURE(status))
+		{
+			ereport(WARNING,
+					(errcode(status),
+					 errmsg("ICU Error: varlena.c, could not collate")));
+		}
+		/*
+		 * In some locales wcscoll() can claim that nonidentical strings
+		 * are equal.  Believing that this might be so also for ICU, and
+		 * believing that would be bad news for a number of
+		 * reasons, we follow Perl's lead and sort "equal" strings
+		 * according to strcmp (on the byte representation).
+		 */
+		if (result == 0)
+		{
+			result = strncmp(arg1, arg2, Min(len1, len2));
+			if ((result == 0) && (len1 != len2))
+				result = (len1 < len2) ? -1 : 1;
+		}
+	}
+#endif /* USE_ICU */
+
 	else
 	{
 		char		a1buf[TEXTBUFLEN];
@@ -1409,34 +1512,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)

 		if (collid != DEFAULT_COLLATION_OID)
 		{
-			if (!OidIsValid(collid))
-			{
-				/*
-				 * This typically means that the parser could not resolve a
-				 * conflict of implicit collations, so report it that way.
-				 */
-				ereport(ERROR,
-						(errcode(ERRCODE_INDETERMINATE_COLLATION),
-						 errmsg("could not determine which collation to use for string comparison"),
-						 errhint("Use the COLLATE clause to set the collation explicitly.")));
-			}
 #ifdef HAVE_LOCALE_T
 			mylocale = pg_newlocale_from_collation(collid);
 #endif
 		}

-		/*
-		 * memcmp() can't tell us which of two unequal strings sorts first,
-		 * but it's a cheap way to tell if they're equal.  Testing shows that
-		 * memcmp() followed by strcoll() is only trivially slower than
-		 * strcoll() by itself, so we don't lose much if this doesn't work out
-		 * very often, and if it does - for example, because there are many
-		 * equal strings in the input - then we win big by avoiding expensive
-		 * collation-aware comparisons.
-		 */
-		if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
-			return 0;
-
 #ifdef WIN32
 		/* Win32 does not have UTF-8, so we need to map to UTF-16 */
 		if (GetDatabaseEncoding() == PG_UTF8)
@@ -1771,6 +1851,9 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
 #ifdef HAVE_LOCALE_T
 	pg_locale_t locale = 0;
 #endif
+#ifdef USE_ICU
+	UCollator  *icu_collator = NULL;
+#endif

 	/*
 	 * If possible, set ssup->comparator to a function which can be used to
@@ -1828,6 +1911,37 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
 #ifdef HAVE_LOCALE_T
 			locale = pg_newlocale_from_collation(collid);
 #endif
+#ifdef USE_ICU
+			if (GetDatabaseEncoding() == PG_UTF8)
+			{
+				icu_collator = pg_icu_collator_from_collation(collid);
+			}
+		}
+		else if (GetDatabaseEncoding() == PG_UTF8)
+		{
+			/* We keep a static default collator "forever" per session,
+			 * as per discussion in varstr_cmp(). */
+			if (default_collator == NULL)
+			{
+				UErrorCode status = U_ZERO_ERROR;
+
+				uloc_setDefault(setlocale(LC_COLLATE, NULL), &status);
+				if(U_FAILURE(status))
+				{
+					ereport(WARNING,
+							(errcode(status),
+							 errmsg("ICU Error: varlena.c, could not set default lc_collate")));
+				}
+				default_collator = ucol_open(NULL, &status);
+				if (U_FAILURE(status))
+				{
+					ereport(WARNING,
+							(errcode(status),
+							 errmsg("ICU Error: varlena.c, could not open collator")));
+				}
+			}
+			icu_collator = default_collator;
+#endif
 		}
 	}

@@ -1879,6 +1993,9 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
 #ifdef HAVE_LOCALE_T
 		sss->locale = locale;
 #endif
+#ifdef USE_ICU
+		sss->icu_collator = icu_collator;
+#endif

 		/*
 		 * To avoid somehow confusing a strxfrm() blob and an original string,
@@ -2089,6 +2206,23 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 		goto done;
 	}

+#ifdef USE_ICU
+	if (GetDatabaseEncoding() == PG_UTF8 && sss->icu_collator)
+	{
+		UErrorCode status = U_ZERO_ERROR;
+		UCharIterator sIter, tIter;
+		uiter_setUTF8(&sIter, a1p, len1);
+		uiter_setUTF8(&tIter, a2p, len2);
+		result = ucol_strcollIter(sss->icu_collator, &sIter, &tIter, &status);
+		if (U_FAILURE(status))
+		{
+			ereport(WARNING,
+					(errcode(status),
+					 errmsg("ICU Error: varlena.c, could not collate")));
+		}
+	}
+	else
+#endif
 #ifdef HAVE_LOCALE_T
 	if (sss->locale)
 		result = strcoll_l(sss->buf1, sss->buf2, sss->locale);
diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c
index 11099b8..d411f45 100644
--- src/backend/utils/mb/encnames.c
+++ src/backend/utils/mb/encnames.c
@@ -403,6 +403,118 @@ const pg_enc2gettext pg_enc2gettext_tbl[] =
 };


+#ifdef USE_ICU
+/*
+ * Try to map most internal character encodings to the proper and
+ * preferred IANA string. Use this in mbutils.c to feed ICU info about
+ * the database's character encoding.
+ *
+ * Palle Girgensohn, 2005
+ */
+
+pg_enc2name pg_enc2iananame_tbl[] =
+{
+	{
+		"US-ASCII", PG_SQL_ASCII
+	},
+	{
+		"EUC-JP", PG_EUC_JP
+	},
+	{
+		"GB2312", PG_EUC_CN
+	},
+	{
+		"EUC-KR", PG_EUC_KR
+	},
+	{
+		"ISO-2022-CN", PG_EUC_TW
+	},
+	{
+		"KS_C_5601-1987", PG_JOHAB  /* either KS_C_5601-1987 or ISO-2022-KR ??? */
+	},
+	{
+		"UTF-8", PG_UTF8
+	},
+	{
+		"MULE_INTERNAL", PG_MULE_INTERNAL  /* is not for real */
+	},
+	{
+		"ISO-8859-1", PG_LATIN1
+	},
+	{
+		"ISO-8859-2", PG_LATIN2
+	},
+	{
+		"ISO-8859-3", PG_LATIN3
+	},
+	{
+		"ISO-8859-4", PG_LATIN4
+	},
+	{
+		"ISO-8859-9", PG_LATIN5
+	},
+	{
+		"ISO-8859-10", PG_LATIN6
+	},
+	{
+		"ISO-8859-13", PG_LATIN7
+	},
+	{
+		"ISO-8859-14", PG_LATIN8
+	},
+	{
+		"ISO-8859-15", PG_LATIN9
+	},
+	{
+		"ISO-8859-16", PG_LATIN10
+	},
+	{
+		"windows-1256", PG_WIN1256
+	},
+	{
+		"windows-874", PG_WIN874
+	},
+	{
+		"KOI8-R", PG_KOI8R
+	},
+	{
+		"windows-1251", PG_WIN1251
+	},
+	{
+		"ISO-8859-5", PG_ISO_8859_5
+	},
+	{
+		"ISO-8859-6", PG_ISO_8859_6
+	},
+	{
+		"ISO-8859-7", PG_ISO_8859_7
+	},
+	{
+		"ISO-8859-8", PG_ISO_8859_8
+	},
+	{
+		"windows-1250", PG_WIN1250
+	},
+	{
+		"Shift_JIS", PG_SJIS
+	},
+	{
+		"Big5", PG_BIG5
+	},
+	{
+		"GBK", PG_GBK
+	},
+	{
+		"cp949", PG_UHC
+	},
+	{
+		"GB18030", PG_GB18030
+	}
+};
+#endif /* USE_ICU */
+
+
+
 /* ----------
  * Encoding checks, for error returns -1 else encoding id
  * ----------
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 7f1c881..7b7bc01 100644
--- src/backend/utils/mb/mbutils.c
+++ src/backend/utils/mb/mbutils.c
@@ -40,6 +40,10 @@
 #include "utils/builtins.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/ucnv.h>
+#endif /* USE_ICU */

 /*
  * When converting strings between different encodings, we assume that space
@@ -913,6 +917,9 @@ SetDatabaseEncoding(int encoding)

 	DatabaseEncoding = &pg_enc2name_tbl[encoding];
 	Assert(DatabaseEncoding->encoding == encoding);
+#ifdef USE_ICU
+	ucnv_setDefaultName((&pg_enc2iananame_tbl[encoding])->name);
+#endif
 }

 void
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 24e8d0d..11da225 100644
--- src/include/mb/pg_wchar.h
+++ src/include/mb/pg_wchar.h
@@ -321,6 +321,10 @@ typedef struct pg_enc2name

 extern const pg_enc2name pg_enc2name_tbl[];

+#ifdef USE_ICU
+extern pg_enc2name pg_enc2iananame_tbl[];
+#endif
+
 /*
  * Encoding names for gettext
  */
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index b621ff2..de58917 100644
--- src/include/pg_config.h.in
+++ src/include/pg_config.h.in
@@ -288,6 +288,12 @@
 /* Define to 1 if you have the `crypto' library (-lcrypto). */
 #undef HAVE_LIBCRYPTO

+/* Define to 1 if you have the `icui18n' library (-licui18n). */
+#undef HAVE_LIBICUI18N
+
+/* Define to 1 if you have the `icuuc' library (-licuuc). */
+#undef HAVE_LIBICUUC
+
 /* Define to 1 if you have the `ldap' library (-lldap). */
 #undef HAVE_LIBLDAP

@@ -796,6 +802,9 @@
 /* Define to 1 to build with BSD Authentication support. (--with-bsd-auth) */
 #undef USE_BSD_AUTH

+/* Define to build with ICU support. (--with-icu) */
+#undef USE_ICU
+
 /* Define to 1 if you want float4 values to be passed by value.
    (--enable-float4-byval) */
 #undef USE_FLOAT4_BYVAL
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 0a4b9f7..d750536 100644
--- src/include/utils/pg_locale.h
+++ src/include/utils/pg_locale.h
@@ -19,6 +19,12 @@

 #include "utils/guc.h"

+#ifdef USE_ICU
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/uchar.h>
+#include <unicode/ucasemap.h>
+#include <unicode/ucol.h>
+#endif

 /* GUC settings */
 extern char *locale_messages;
@@ -71,6 +77,10 @@ typedef locale_t pg_locale_t;
 typedef int pg_locale_t;
 #endif

+#ifdef USE_ICU
+extern UCollator * pg_icu_collator_from_collation(Oid collid);
+extern UCaseMap  * pg_icu_casemap_from_collation(Oid collid);
+#endif
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);

 /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */