diff --git a/aclocal.m4 b/aclocal.m4 index 6f930b6..5ca902b 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -7,6 +7,7 @@ m4_include([config/docbook.m4]) m4_include([config/general.m4]) m4_include([config/libtool.m4]) m4_include([config/perl.m4]) +m4_include([config/pkg.m4]) m4_include([config/programs.m4]) m4_include([config/python.m4]) m4_include([config/tcl.m4]) diff --git a/config/pkg.m4 b/config/pkg.m4 new file mode 100644 index 0000000..82bea96 --- /dev/null +++ b/config/pkg.m4 @@ -0,0 +1,275 @@ +dnl pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +dnl serial 11 (pkg-config-0.29.1) +dnl +dnl Copyright © 2004 Scott James Remnant . +dnl Copyright © 2012-2015 Dan Nicholson +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +dnl 02111-1307, USA. +dnl +dnl As a special exception to the GNU General Public License, if you +dnl distribute this file as part of a program that contains a +dnl configuration script generated by Autoconf, you may include it under +dnl the same distribution terms that you use for the rest of that +dnl program. + +dnl PKG_PREREQ(MIN-VERSION) +dnl ----------------------- +dnl Since: 0.29 +dnl +dnl Verify that the version of the pkg-config macros are at least +dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's +dnl installed version of pkg-config, this checks the developer's version +dnl of pkg.m4 when generating configure. +dnl +dnl To ensure that this macro is defined, also add: +dnl m4_ifndef([PKG_PREREQ], +dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) +dnl +dnl See the "Since" comment for each macro you use to see what version +dnl of the macros you require. +m4_defun([PKG_PREREQ], +[m4_define([PKG_MACROS_VERSION], [0.29.1]) +m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, + [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) +])dnl PKG_PREREQ + +dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) +dnl ---------------------------------- +dnl Since: 0.16 +dnl +dnl Search for the pkg-config tool and set the PKG_CONFIG variable to +dnl first found in the path. Checks that the version of pkg-config found +dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is +dnl used since that's the first version where most current features of +dnl pkg-config existed. +AC_DEFUN([PKG_PROG_PKG_CONFIG], +[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) +m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) +m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) +AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) +AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=m4_default([$1], [0.9.0]) + AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + PKG_CONFIG="" + fi +fi[]dnl +])dnl PKG_PROG_PKG_CONFIG + +dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------------------------------- +dnl Since: 0.18 +dnl +dnl Check to see whether a particular set of modules exists. Similar to +dnl PKG_CHECK_MODULES(), but does not set variables or print errors. +dnl +dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +dnl only at the first occurence in configure.ac, so if the first place +dnl it's called might be skipped (such as if it is within an "if", you +dnl have to call PKG_CHECK_EXISTS manually +AC_DEFUN([PKG_CHECK_EXISTS], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +if test -n "$PKG_CONFIG" && \ + AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then + m4_default([$2], [:]) +m4_ifvaln([$3], [else + $3])dnl +fi]) + +dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +dnl --------------------------------------------- +dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting +dnl pkg_failed based on the result. +m4_define([_PKG_CONFIG], +[if test -n "$$1"; then + pkg_cv_[]$1="$$1" + elif test -n "$PKG_CONFIG"; then + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes ], + [pkg_failed=yes]) + else + pkg_failed=untried +fi[]dnl +])dnl _PKG_CONFIG + +dnl _PKG_SHORT_ERRORS_SUPPORTED +dnl --------------------------- +dnl Internal check to see if pkg-config supports short errors. +AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi[]dnl +])dnl _PKG_SHORT_ERRORS_SUPPORTED + + +dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl -------------------------------------------------------------- +dnl Since: 0.4.0 +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES might not happen, you should be sure to include an +dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac +AC_DEFUN([PKG_CHECK_MODULES], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl +AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl + +pkg_failed=no +AC_MSG_CHECKING([for $1]) + +_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) +_PKG_CONFIG([$1][_LIBS], [libs], [$2]) + +m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS +and $1[]_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details.]) + +if test $pkg_failed = yes; then + AC_MSG_RESULT([no]) + _PKG_SHORT_ERRORS_SUPPORTED + if test $_pkg_short_errors_supported = yes; then + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + + m4_default([$4], [AC_MSG_ERROR( +[Package requirements ($2) were not met: + +$$1_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +_PKG_TEXT])[]dnl + ]) +elif test $pkg_failed = untried; then + AC_MSG_RESULT([no]) + m4_default([$4], [AC_MSG_FAILURE( +[The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +_PKG_TEXT + +To get pkg-config, see .])[]dnl + ]) +else + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + AC_MSG_RESULT([yes]) + $3 +fi[]dnl +])dnl PKG_CHECK_MODULES + + +dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl --------------------------------------------------------------------- +dnl Since: 0.29 +dnl +dnl Checks for existence of MODULES and gathers its build flags with +dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags +dnl and VARIABLE-PREFIX_LIBS from --libs. +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to +dnl include an explicit call to PKG_PROG_PKG_CONFIG in your +dnl configure.ac. +AC_DEFUN([PKG_CHECK_MODULES_STATIC], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +_save_PKG_CONFIG=$PKG_CONFIG +PKG_CONFIG="$PKG_CONFIG --static" +PKG_CHECK_MODULES($@) +PKG_CONFIG=$_save_PKG_CONFIG[]dnl +])dnl PKG_CHECK_MODULES_STATIC + + +dnl PKG_INSTALLDIR([DIRECTORY]) +dnl ------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable pkgconfigdir as the location where a module +dnl should install pkg-config .pc files. By default the directory is +dnl $libdir/pkgconfig, but the default can be changed by passing +dnl DIRECTORY. The user can override through the --with-pkgconfigdir +dnl parameter. +AC_DEFUN([PKG_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([pkgconfigdir], + [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, + [with_pkgconfigdir=]pkg_default) +AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_INSTALLDIR + + +dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) +dnl -------------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable noarch_pkgconfigdir as the location where a +dnl module should install arch-independent pkg-config .pc files. By +dnl default the directory is $datadir/pkgconfig, but the default can be +dnl changed by passing DIRECTORY. The user can override through the +dnl --with-noarch-pkgconfigdir parameter. +AC_DEFUN([PKG_NOARCH_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([noarch-pkgconfigdir], + [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, + [with_noarch_pkgconfigdir=]pkg_default) +AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_NOARCH_INSTALLDIR + + +dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------- +dnl Since: 0.28 +dnl +dnl Retrieves the value of the pkg-config variable for the given module. +AC_DEFUN([PKG_CHECK_VAR], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl + +_PKG_CONFIG([$1], [variable="][$3]["], [$2]) +AS_VAR_COPY([$1], [pkg_cv_][$1]) + +AS_VAR_IF([$1], [""], [$5], [$4])dnl +])dnl PKG_CHECK_VAR diff --git a/configure b/configure index 45c8eef..9690dc6 100755 --- a/configure +++ b/configure @@ -716,6 +716,12 @@ krb_srvtab with_python with_perl with_tcl +ICU_LIBS +ICU_CFLAGS +PKG_CONFIG_LIBDIR +PKG_CONFIG_PATH +PKG_CONFIG +with_icu enable_thread_safety INCLUDES autodepend @@ -820,6 +826,7 @@ with_CC enable_depend enable_cassert enable_thread_safety +with_icu with_tcl with_tclconfig with_perl @@ -855,6 +862,11 @@ LDFLAGS LIBS CPPFLAGS CPP +PKG_CONFIG +PKG_CONFIG_PATH +PKG_CONFIG_LIBDIR +ICU_CFLAGS +ICU_LIBS LDFLAGS_EX LDFLAGS_SL DOCBOOKSTYLE' @@ -1509,6 +1521,7 @@ Optional Packages: --with-wal-segsize=SEGSIZE set WAL segment size in MB [16] --with-CC=CMD set compiler (deprecated) + --with-icu build with ICU support --with-tcl build Tcl modules (PL/Tcl) --with-tclconfig=DIR tclConfig.sh is in DIR --with-perl build Perl modules (PL/Perl) @@ -1544,6 +1557,13 @@ Some influential environment variables: CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor + PKG_CONFIG path to pkg-config utility + PKG_CONFIG_PATH + directories to add to pkg-config's search path + PKG_CONFIG_LIBDIR + path overriding pkg-config's built-in search path + ICU_CFLAGS C compiler flags for ICU, overriding pkg-config + ICU_LIBS linker flags for ICU, overriding pkg-config LDFLAGS_EX extra linker flags for linking executables only LDFLAGS_SL extra linker flags for linking shared libraries only DOCBOOKSTYLE @@ -5340,6 +5360,255 @@ $as_echo "$enable_thread_safety" >&6; } # +# ICU +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with ICU support" >&5 +$as_echo_n "checking whether to build with ICU support... " >&6; } + + + +# Check whether --with-icu was given. +if test "${with_icu+set}" = set; then : + withval=$with_icu; + case $withval in + yes) + +$as_echo "#define USE_ICU 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-icu option" "$LINENO" 5 + ;; + esac + +else + with_icu=no + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_icu" >&5 +$as_echo "$with_icu" >&6; } + + +if test "$with_icu" = yes; then + + + + + + + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. +set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_PKG_CONFIG+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +$as_echo "$PKG_CONFIG" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_PKG_CONFIG"; then + ac_pt_PKG_CONFIG=$PKG_CONFIG + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_PKG_CONFIG+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG +if test -n "$ac_pt_PKG_CONFIG"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 +$as_echo "$ac_pt_PKG_CONFIG" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_PKG_CONFIG" = x; then + PKG_CONFIG="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PKG_CONFIG=$ac_pt_PKG_CONFIG + fi +else + PKG_CONFIG="$ac_cv_path_PKG_CONFIG" +fi + +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=0.9.0 + { $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 +$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; } + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + PKG_CONFIG="" + fi +fi + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ICU" >&5 +$as_echo_n "checking for ICU... " >&6; } + +if test -n "$ICU_CFLAGS"; then + pkg_cv_ICU_CFLAGS="$ICU_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"icu-uc icu-i18n\""; } >&5 + ($PKG_CONFIG --exists --print-errors "icu-uc icu-i18n") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_ICU_CFLAGS=`$PKG_CONFIG --cflags "icu-uc icu-i18n" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$ICU_LIBS"; then + pkg_cv_ICU_LIBS="$ICU_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"icu-uc icu-i18n\""; } >&5 + ($PKG_CONFIG --exists --print-errors "icu-uc icu-i18n") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_ICU_LIBS=`$PKG_CONFIG --libs "icu-uc icu-i18n" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + ICU_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "icu-uc icu-i18n" 2>&1` + else + ICU_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "icu-uc icu-i18n" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$ICU_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (icu-uc icu-i18n) were not met: + +$ICU_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables ICU_CFLAGS +and ICU_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details." "$LINENO" 5 +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables ICU_CFLAGS +and ICU_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See \`config.log' for more details" "$LINENO" 5; } +else + ICU_CFLAGS=$pkg_cv_ICU_CFLAGS + ICU_LIBS=$pkg_cv_ICU_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +fi +fi + +# # Optionally build Tcl modules (PL/Tcl) # { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with Tcl" >&5 diff --git a/configure.in b/configure.in index c878b4e..4ef9b4c 100644 --- a/configure.in +++ b/configure.in @@ -609,6 +609,19 @@ AC_MSG_RESULT([$enable_thread_safety]) AC_SUBST(enable_thread_safety) # +# ICU +# +AC_MSG_CHECKING([whether to build with ICU support]) +PGAC_ARG_BOOL(with, icu, no, [build with ICU support], + [AC_DEFINE([USE_ICU], 1, [Define to build with ICU support. (--with-icu)])]) +AC_MSG_RESULT([$with_icu]) +AC_SUBST(with_icu) + +if test "$with_icu" = yes; then + PKG_CHECK_MODULES(ICU, icu-uc icu-i18n) +fi + +# # Optionally build Tcl modules (PL/Tcl) # AC_MSG_CHECKING([whether to build with Tcl]) diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index 14a6d57..8e62b11 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -771,6 +771,15 @@ Configuration + + + + Build with support for the ICU library. + + + + + OpenSSL diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index d757cdf..f37ffba 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -21,7 +21,8 @@ CREATE COLLATION name ( [ LOCALE = locale, ] [ LC_COLLATE = lc_collate, ] - [ LC_CTYPE = lc_ctype ] + [ LC_CTYPE = lc_ctype, ] + [ PROVIDER = provider ] ) CREATE COLLATION name FROM existing_collation @@ -103,6 +104,19 @@ Parameters + provider + + + + Specifies the provider to use for locale services associated with this + collation. Possible values + are: icu, posix. The available + choices depend on the operating system and build options. + + + + + existing_collation diff --git a/src/Makefile.global.in b/src/Makefile.global.in index c211a2d..ad50938 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -179,6 +179,7 @@ pgxsdir = $(pkglibdir)/pgxs # # Records the choice of the various --enable-xxx and --with-xxx options. +with_icu = @with_icu@ with_perl = @with_perl@ with_python = @with_python@ with_tcl = @with_tcl@ @@ -207,6 +208,9 @@ python_version = @python_version@ krb_srvtab = @krb_srvtab@ +ICU_CFLAGS = @ICU_CFLAGS@ +ICU_LIBS = @ICU_LIBS@ + TCLSH = @TCLSH@ TCL_LIBS = @TCL_LIBS@ TCL_LIB_SPEC = @TCL_LIB_SPEC@ diff --git a/src/backend/Makefile b/src/backend/Makefile index 3b08def..943b36d 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -58,7 +58,7 @@ ifneq ($(PORTNAME), win32) ifneq ($(PORTNAME), aix) postgres: $(OBJS) - $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) $(ICU_LIBS) -o $@ endif endif diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index f37cf37..7d4a8b5 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -40,8 +40,10 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, + char collprovider, int32 collencoding, - const char *collcollate, const char *collctype) + const char *collcollate, const char *collctype, + int32 collversion) { Relation rel; TupleDesc tupDesc; @@ -75,12 +77,12 @@ CollationCreate(const char *collname, Oid collnamespace, ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("collation \"%s\" for encoding \"%s\" already exists", - collname, pg_encoding_to_char(collencoding)))); + collname, pg_encoding_to_char(collencoding)))); // XXX /* * Also forbid matching an any-encoding entry. This test of course is not * backed up by the unique index, but it's not a problem since we don't - * support adding any-encoding entries after initdb. + * support adding any-encoding entries after initdb. XXX */ if (SearchSysCacheExists3(COLLNAMEENCNSP, PointerGetDatum(collname), @@ -102,11 +104,13 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collname - 1] = NameGetDatum(&name_name); values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace); values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner); + values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); namestrcpy(&name_collate, collcollate); values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate); namestrcpy(&name_ctype, collctype); values[Anum_pg_collation_collctype - 1] = NameGetDatum(&name_ctype); + values[Anum_pg_collation_collversion - 1] = Int32GetDatum(collversion); tup = heap_form_tuple(tupDesc, values, nulls); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index e4ebb65..7bd9479 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -48,8 +48,12 @@ DefineCollation(List *names, List *parameters) DefElem *localeEl = NULL; DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; + DefElem *providerEl = NULL; char *collcollate = NULL; char *collctype = NULL; + char *collproviderstr = NULL; + char collprovider; + int32 collversion; Oid newoid; ObjectAddress address; @@ -73,6 +77,8 @@ DefineCollation(List *names, List *parameters) defelp = &lccollateEl; else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0) defelp = &lcctypeEl; + else if (pg_strcasecmp(defel->defname, "provider") == 0) + defelp = &providerEl; else { ereport(ERROR, @@ -103,6 +109,7 @@ DefineCollation(List *names, List *parameters) collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); + collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; ReleaseSysCache(tp); } @@ -119,6 +126,26 @@ DefineCollation(List *names, List *parameters) if (lcctypeEl) collctype = defGetString(lcctypeEl); + if (providerEl) + collproviderstr = defGetString(providerEl); + + if (collproviderstr) + { + if (pg_strcasecmp(collproviderstr, "posix") == 0) + collprovider = COLLPROVIDER_POSIX; +#ifdef USE_ICU + if (pg_strcasecmp(collproviderstr, "icu") == 0) + collprovider = COLLPROVIDER_ICU; +#endif + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("unrecognized collation provider: %s", + collproviderstr))); + } + else if (!fromEl) + collprovider = COLLPROVIDER_POSIX; + if (!collcollate) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -131,12 +158,37 @@ DefineCollation(List *names, List *parameters) check_encoding_locale_matches(GetDatabaseEncoding(), collcollate, collctype); +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + UCollator *collator; + UErrorCode status; + UVersionInfo versioninfo; + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + collversion = ntohl(*((uint32 *) versioninfo)); + } + else +#endif + collversion = 0; + newoid = CollationCreate(collName, collNamespace, GetUserId(), + collprovider, GetDatabaseEncoding(), collcollate, - collctype); + collctype, + collversion); ObjectAddressSet(address, CollationRelationId, newoid); diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 551ae7d..7adf017 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -303,13 +303,13 @@ pg_wc_isdigit(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale); + return iswdigit_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale)); + isdigit_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -336,13 +336,13 @@ pg_wc_isalpha(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale); + return iswalpha_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale)); + isalpha_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -369,13 +369,13 @@ pg_wc_isalnum(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale); + return iswalnum_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale)); + isalnum_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -402,13 +402,13 @@ pg_wc_isupper(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale); + return iswupper_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale)); + isupper_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -435,13 +435,13 @@ pg_wc_islower(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale); + return iswlower_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale)); + islower_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -468,13 +468,13 @@ pg_wc_isgraph(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale); + return iswgraph_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale)); + isgraph_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -501,13 +501,13 @@ pg_wc_isprint(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale); + return iswprint_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale)); + isprint_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -534,13 +534,13 @@ pg_wc_ispunct(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale); + return iswpunct_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale)); + ispunct_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -567,13 +567,13 @@ pg_wc_isspace(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale); + return iswspace_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale)); + isspace_l((unsigned char) c, pg_regex_locale->lt)); #endif break; } @@ -608,13 +608,13 @@ pg_wc_toupper(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale); + return towupper_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale); + return toupper_l((unsigned char) c, pg_regex_locale->lt); #endif return c; } @@ -649,13 +649,13 @@ pg_wc_tolower(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale); + return towlower_l((wint_t) c, pg_regex_locale->lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale); + return tolower_l((unsigned char) c, pg_regex_locale->lt); #endif return c; } diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index bbd97dc..be3968b 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -82,6 +82,10 @@ #include #endif +#ifdef USE_ICU +#include +#endif + #include "catalog/pg_collation.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" @@ -1456,6 +1460,42 @@ str_numth(char *dest, char *num, int type) * upper/lower/initcap functions *****************************************************************************/ +#ifdef USE_ICU +static int32_t +icu_convert_case(int32_t (*func)(UChar *, int32_t, const UChar *, int32_t, const char *, UErrorCode *), + pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source) +{ + UErrorCode status; + int32_t len_dest; + + len_dest = len_source; /* try first with same length */ + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->locale, &status); + if (status == U_BUFFER_OVERFLOW_ERROR) + { + /* try again with adjusted length */ + pfree(buff_dest); + buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->locale, &status); + } + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("case conversion failed: %s", u_errorName(status)))); + return len_dest; +} + +static int32_t +u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) +{ + return u_strToTitle(dest, destCapacity, src, srcLength, NULL, locale, pErrorCode); +} +#endif + /* * If the system provides the needed functions for wide-character manipulation * (which are all standardized by C99), then we implement upper/lower/initcap @@ -1492,12 +1532,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) result = asc_tolower(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1515,77 +1552,79 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar; + int32_t len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(mylocale, &buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToLower, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(mylocale, &result, buff_conv, len_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; + + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); - else + if (mylocale) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->lt); + else #endif - workspace[curr_char] = towlower(workspace[curr_char]); - } + workspace[curr_char] = towlower(workspace[curr_char]); + } - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } #endif /* USE_WIDE_UPPER_LOWER */ - else - { -#ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; -#endif - char *p; - - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) + else { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for lower() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } -#ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); -#endif - } + char *p; - result = pnstrdup(buff, nbytes); + result = pnstrdup(buff, nbytes); - /* - * Note: we assume that tolower_l() will not be so broken as to need - * an isupper_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = result; *p; p++) - { + /* + * Note: we assume that tolower_l() will not be so broken as to need + * an isupper_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = result; *p; p++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale); - else + if (mylocale) + *p = tolower_l((unsigned char) *p, mylocale->lt); + else #endif - *p = pg_tolower((unsigned char) *p); + *p = pg_tolower((unsigned char) *p); + } + } } } @@ -1612,12 +1651,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) result = asc_toupper(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1635,77 +1671,78 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar, len_conv; + UChar *buff_uchar; + UChar *buff_conv; - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + len_uchar = icu_to_uchar(mylocale, &buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToUpper, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(mylocale, &result, buff_conv, len_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { -#ifdef HAVE_LOCALE_T - if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); - else -#endif - workspace[curr_char] = towupper(workspace[curr_char]); - } + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } -#endif /* USE_WIDE_UPPER_LOWER */ - else - { + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; + if (mylocale) + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->lt); + else #endif - char *p; + workspace[curr_char] = towupper(workspace[curr_char]); + } - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for upper() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); } -#ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); -#endif - } +#endif /* USE_WIDE_UPPER_LOWER */ + else + { + char *p; - result = pnstrdup(buff, nbytes); + result = pnstrdup(buff, nbytes); - /* - * Note: we assume that toupper_l() will not be so broken as to need - * an islower_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = result; *p; p++) - { + /* + * Note: we assume that toupper_l() will not be so broken as to need + * an islower_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = result; *p; p++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale); - else + if (mylocale) + *p = toupper_l((unsigned char) *p, mylocale->lt); + else #endif - *p = pg_toupper((unsigned char) *p); + *p = pg_toupper((unsigned char) *p); + } + } } } @@ -1733,12 +1770,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) result = asc_initcap(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1756,100 +1790,101 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); - - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) { -#ifdef HAVE_LOCALE_T - if (mylocale) - { - if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); - else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); - wasalnum = iswalnum_l(workspace[curr_char], mylocale); - } - else + int32_t len_uchar, len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(mylocale, &buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(mylocale, &result, buff_conv, len_conv); + } + else #endif + { + if (pg_database_encoding_max_length() > 1) { - if (wasalnum) - workspace[curr_char] = towlower(workspace[curr_char]); - else - workspace[curr_char] = towupper(workspace[curr_char]); - wasalnum = iswalnum(workspace[curr_char]); - } - } + wchar_t *workspace; + size_t curr_char; + size_t result_size; - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } -#endif /* USE_WIDE_UPPER_LOWER */ - else - { -#ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; -#endif - char *p; + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for initcap() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); + if (mylocale) + { + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->lt); + else + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->lt); + } + else #endif - } + { + if (wasalnum) + workspace[curr_char] = towlower(workspace[curr_char]); + else + workspace[curr_char] = towupper(workspace[curr_char]); + wasalnum = iswalnum(workspace[curr_char]); + } + } - result = pnstrdup(buff, nbytes); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); - /* - * Note: we assume that toupper_l()/tolower_l() will not be so broken - * as to need guard tests. When using the default collation, we apply - * the traditional Postgres behavior that forces ASCII-style treatment - * of I/i, but in non-default collations you get exactly what the - * collation says. - */ - for (p = result; *p; p++) - { -#ifdef HAVE_LOCALE_T - if (mylocale) - { - if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale); - else - *p = toupper_l((unsigned char) *p, mylocale); - wasalnum = isalnum_l((unsigned char) *p, mylocale); + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); } +#endif /* USE_WIDE_UPPER_LOWER */ else -#endif { - if (wasalnum) - *p = pg_tolower((unsigned char) *p); - else - *p = pg_toupper((unsigned char) *p); - wasalnum = isalnum((unsigned char) *p); + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that toupper_l()/tolower_l() will not be so broken + * as to need guard tests. When using the default collation, we apply + * the traditional Postgres behavior that forces ASCII-style treatment + * of I/i, but in non-default collations you get exactly what the + * collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale->lt); + else + *p = toupper_l((unsigned char) *p, mylocale->lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->lt); + } + else +#endif + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + wasalnum = isalnum((unsigned char) *p); + } + } } } } diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 08b86f5..272a721 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -96,7 +96,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) return pg_ascii_tolower(c); #ifdef HAVE_LOCALE_T else if (locale) - return tolower_l(c, locale); + return tolower_l(c, locale->lt); #endif else return pg_tolower(c); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a818023..5d0e39d 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1213,7 +1213,60 @@ pg_newlocale_from_collation(Oid collid) #endif } - cache_entry->locale = result; + cache_entry->locale = malloc(sizeof(* cache_entry->locale)); + cache_entry->locale->provider = collform->collprovider; + cache_entry->locale->lt = result; + + if (collform->collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + const char *collcollate; + UCollator *collator; + const char *encstring; + UConverter *converter; + UErrorCode status; + UVersionInfo versioninfo; + int32 numversion; + + collcollate = NameStr(collform->collcollate); + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + + encstring = (&pg_enc2name_tbl[GetDatabaseEncoding()])->name; + + status = U_ZERO_ERROR; + converter = ucnv_open(encstring, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open converter for encoding \"%s\": %s", + encstring, u_errorName(status)))); + + ucol_getVersion(collator, versioninfo); + numversion = ntohl(*((uint32 *) versioninfo)); + + if (numversion != collform->collversion) + ereport(WARNING, + (errmsg("ICU collator version mismatch"), + errdetail("The database was created using version 0x%08X, the library provides version 0x%08X.", + (uint32) collform->collversion, (uint32) numversion), + errhint("Rebuild affected indexes, or build PostgreSQL with the right version of ICU."))); + + cache_entry->locale->ucol = collator; + cache_entry->locale->locale = strdup(collcollate); + cache_entry->locale->ucnv = converter; +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), \ + errhint("You need to rebuild PostgreSQL using --with-icu."))); +#endif /* not USE_ICU */ + } ReleaseSysCache(tp); #else /* not HAVE_LOCALE_T */ @@ -1232,6 +1285,40 @@ pg_newlocale_from_collation(Oid collid) } +#ifdef USE_ICU +int32_t +icu_to_uchar(pg_locale_t mylocale, UChar **buff_uchar, const char *buff, size_t nbytes) +{ + UErrorCode status; + int32_t len_uchar; + + len_uchar = 2 * nbytes; /* max length per docs */ + *buff_uchar = palloc(len_uchar * sizeof(**buff_uchar)); + status = U_ZERO_ERROR; + len_uchar = ucnv_toUChars(mylocale->ucnv, *buff_uchar, len_uchar, buff, nbytes, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_toUChars failed: %s", u_errorName(status)))); + return len_uchar; +} + +int32_t +icu_from_uchar(pg_locale_t mylocale, char **result, UChar *buff_uchar, int32_t len_uchar) +{ + UErrorCode status; + int32_t len_result; + + len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(mylocale->ucnv)); + *result = palloc(len_result + 1); + status = U_ZERO_ERROR; + ucnv_fromUChars(mylocale->ucnv, *result, len_result, buff_uchar, len_uchar, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_fromUChars failed: %s", u_errorName(status)))); + return len_result; +} +#endif + /* * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. * Therefore we keep them here rather than with the mbutils code. @@ -1287,7 +1374,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) #ifdef HAVE_LOCALE_T #ifdef HAVE_WCSTOMBS_L /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale); + result = wcstombs_l(to, from, tolen, locale->lt); #else /* !HAVE_WCSTOMBS_L */ /* We have to temporarily set the locale as current ... ugh */ locale_t save_locale = uselocale(locale); @@ -1362,7 +1449,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, #ifdef HAVE_LOCALE_T #ifdef HAVE_MBSTOWCS_L /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale); + result = mbstowcs_l(to, str, tolen, locale->lt); #else /* !HAVE_MBSTOWCS_L */ /* We have to temporarily set the locale as current ... ugh */ locale_t save_locale = uselocale(locale); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 56943f2..662956d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5274,7 +5274,7 @@ pattern_char_isalpha(char c, bool is_multibyte, return true; #ifdef HAVE_LOCALE_T else if (locale) - return isalpha_l((unsigned char) c, locale); + return isalpha_l((unsigned char) c, locale->lt); #endif else return isalpha((unsigned char) c); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index bf7c0cd..af6f762 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1402,10 +1402,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) char a2buf[TEXTBUFLEN]; char *a1p, *a2p; - -#ifdef HAVE_LOCALE_T pg_locale_t mylocale = 0; -#endif if (collid != DEFAULT_COLLATION_OID) { @@ -1420,9 +1417,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) errmsg("could not determine which collation to use for string comparison"), errhint("Use the COLLATE clause to set the collation explicitly."))); } -#ifdef HAVE_LOCALE_T mylocale = pg_newlocale_from_collation(collid); -#endif } /* @@ -1541,9 +1536,42 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) memcpy(a2p, arg2, len2); a2p[len2] = '\0'; -#ifdef HAVE_LOCALE_T if (mylocale) - result = strcoll_l(a1p, a2p, mylocale); + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(mylocale->ucol, + arg1, len1, + arg2, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else + { + int32_t ulen1, ulen2; + UChar *uchar1, *uchar2; + + ulen1 = icu_to_uchar(mylocale, &uchar1, arg1, len1); + ulen2 = icu_to_uchar(mylocale, &uchar2, arg2, len2); + + result = ucol_strcoll(mylocale->ucol, + uchar1, ulen1, + uchar2, ulen2); + } + } + else +#endif +#ifdef HAVE_LOCALE_T + result = strcoll_l(a1p, a2p, mylocale->lt); + } else #endif result = strcoll(a1p, a2p); @@ -2089,9 +2117,42 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup) goto done; } -#ifdef HAVE_LOCALE_T if (sss->locale) - result = strcoll_l(sss->buf1, sss->buf2, sss->locale); + { +#ifdef USE_ICU + if (sss->locale->provider == COLLPROVIDER_ICU) + { + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(sss->locale->ucol, + a1p, len1, + a2p, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else + { + int32_t ulen1, ulen2; + UChar *uchar1, *uchar2; + + ulen1 = icu_to_uchar(sss->locale, &uchar1, a1p, len1); + ulen2 = icu_to_uchar(sss->locale, &uchar2, a2p, len2); + + result = ucol_strcoll(sss->locale->ucol, + uchar1, ulen1, + uchar2, ulen2); + } + } + else +#endif +#ifdef HAVE_LOCALE_T + result = strcoll_l(sss->buf1, sss->buf2, sss->locale->lt); + } else #endif result = strcoll(sss->buf1, sss->buf2); @@ -2231,7 +2292,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) #ifdef HAVE_LOCALE_T if (sss->locale) bsize = strxfrm_l(sss->buf2, sss->buf1, - sss->buflen2, sss->locale); + sss->buflen2, sss->locale->lt); else #endif bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2); diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index 394eae0..404fd33 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -16,7 +16,7 @@ subdir = src/bin/initdb top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -override CPPFLAGS := -DFRONTEND -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS) +override CPPFLAGS := -DFRONTEND -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS) $(ICU_CFLAGS) # note: we need libpq only because fe_utils does LDFLAGS += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) @@ -31,7 +31,7 @@ OBJS= initdb.o findtimezone.o localtime.o encnames.o $(WIN32RES) all: initdb initdb: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils - $(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) + $(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) $(ICU_LIBS) -o $@$(X) # We used to pull in all of libpq to get encnames.c, but that # exposes us to risks of version skew if we link to a shared library. diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index aad6ba5..6a3712f 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -69,6 +69,11 @@ #include "miscadmin.h" #include "fe_utils/string_utils.h" +#ifdef USE_ICU +#include +#include +#endif + /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ #if defined(HAVE_SYNC_FILE_RANGE) @@ -332,6 +337,12 @@ do { \ output_failed = true, output_errno = errno; \ } while (0) +#define PG_CMD_PRINTF4(fmt, arg1, arg2, arg3, arg4) \ +do { \ + if (fprintf(cmdfd, fmt, arg1, arg2, arg3, arg4) < 0 || fflush(cmdfd) < 0) \ + output_failed = true, output_errno = errno; \ +} while (0) + static char * escape_quotes(const char *src) { @@ -1921,12 +1932,12 @@ setup_collation(FILE *cmdfd) * Also, eliminate any aliases that conflict with pg_collation's * hard-wired entries for "C" etc. */ - PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) " + PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype, collversion) " " SELECT DISTINCT ON (collname, encoding)" " collname, " " (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, " " (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, " - " encoding, locale, locale " + " 'p', encoding, locale, locale, 0 " " FROM tmp_pg_collation" " WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)" " ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n"); @@ -1945,6 +1956,41 @@ setup_collation(FILE *cmdfd) printf(_("Use the option \"--debug\" to see details.\n")); } #endif /* not HAVE_LOCALE_T && not WIN32 */ + +#ifdef USE_ICU + { + int i; + + for (i = 0; i < uloc_countAvailable(); i++) + { + const char *name = uloc_getAvailable(i); + UCollator *collator; + UErrorCode status; + UVersionInfo versioninfo; + int32 collversion; + + status = U_ZERO_ERROR; + collator = ucol_open(name, &status); + if (U_FAILURE(status)) + { + fprintf(stderr, "%s: could not open collator for locale \"%s\": %s", + progname, name, u_errorName(status)); + exit_nicely(); + } + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + collversion = ntohl(*((uint32 *) versioninfo)); + + PG_CMD_PRINTF4("INSERT INTO pg_collation (collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype, collversion) " + "VALUES ('%s%%icu', " + "(SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog'), " + "(SELECT relowner FROM pg_class WHERE relname = 'pg_collation'), " + "'i', -1, '%s', '%s', %d);", + name, name, name, collversion); + } + } +#endif } /* diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a5c2d09..644c5c0 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -13176,8 +13176,10 @@ dumpCollation(Archive *fout, CollInfo *collinfo) PQExpBuffer delq; PQExpBuffer labelq; PGresult *res; + int i_collprovider; int i_collcollate; int i_collctype; + const char *collprovider; const char *collcollate; const char *collctype; @@ -13194,18 +13196,30 @@ dumpCollation(Archive *fout, CollInfo *collinfo) selectSourceSchema(fout, collinfo->dobj.namespace->dobj.name); /* Get collation-specific details */ - appendPQExpBuffer(query, "SELECT " - "collcollate, " - "collctype " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + if (fout->remoteVersion >= 100000) + appendPQExpBuffer(query, "SELECT " + "collprovider, " + "collcollate, " + "collctype " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); + else + appendPQExpBuffer(query, "SELECT " + "'p'::char AS collprovider, " + "collcollate, " + "collctype " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); res = ExecuteSqlQueryForSingleRow(fout, query->data); + i_collprovider = PQfnumber(res, "collprovider"); i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); + collprovider = PQgetvalue(res, 0, i_collprovider); collcollate = PQgetvalue(res, 0, i_collcollate); collctype = PQgetvalue(res, 0, i_collctype); @@ -13217,11 +13231,32 @@ dumpCollation(Archive *fout, CollInfo *collinfo) appendPQExpBuffer(delq, ".%s;\n", fmtId(collinfo->dobj.name)); - appendPQExpBuffer(q, "CREATE COLLATION %s (lc_collate = ", + appendPQExpBuffer(q, "CREATE COLLATION %s (", fmtId(collinfo->dobj.name)); - appendStringLiteralAH(q, collcollate, fout); - appendPQExpBufferStr(q, ", lc_ctype = "); - appendStringLiteralAH(q, collctype, fout); + + appendPQExpBufferStr(q, "provider = "); + if (collprovider[0] == 'i') + appendStringLiteralAH(q, "icu", fout); + else if (collprovider[0] == 'p') + appendStringLiteralAH(q, "posix", fout); + else + exit_horribly(NULL, + "unrecognized collation provider: %s\n", + collprovider); + + if (strcmp(collcollate, collctype) == 0) + { + appendPQExpBufferStr(q, ", locale = "); + appendStringLiteralAH(q, collcollate, fout); + } + else + { + appendPQExpBufferStr(q, ", lc_collate = "); + appendStringLiteralAH(q, collcollate, fout); + appendPQExpBufferStr(q, ", lc_ctype = "); + appendStringLiteralAH(q, collctype, fout); + } + appendPQExpBufferStr(q, ");\n"); appendPQExpBuffer(labelq, "COLLATION %s", fmtId(collinfo->dobj.name)); diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 6367712..a1c65f6 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -34,9 +34,11 @@ CATALOG(pg_collation,3456) NameData collname; /* collation name */ Oid collnamespace; /* OID of namespace containing collation */ Oid collowner; /* owner of collation */ + char collprovider; /* see constants below */ int32 collencoding; /* encoding for this collation; -1 = "all" */ NameData collcollate; /* LC_COLLATE setting */ NameData collctype; /* LC_CTYPE setting */ + int32 collversion; /* provider-dependent version of collation data */ } FormData_pg_collation; /* ---------------- @@ -50,27 +52,34 @@ typedef FormData_pg_collation *Form_pg_collation; * compiler constants for pg_collation * ---------------- */ -#define Natts_pg_collation 6 +#define Natts_pg_collation 8 #define Anum_pg_collation_collname 1 #define Anum_pg_collation_collnamespace 2 #define Anum_pg_collation_collowner 3 -#define Anum_pg_collation_collencoding 4 -#define Anum_pg_collation_collcollate 5 -#define Anum_pg_collation_collctype 6 +#define Anum_pg_collation_collprovider 4 +#define Anum_pg_collation_collencoding 5 +#define Anum_pg_collation_collcollate 6 +#define Anum_pg_collation_collctype 7 +#define Anum_pg_collation_collversion 8 /* ---------------- * initial contents of pg_collation * ---------------- */ -DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" )); +DATA(insert OID = 100 ( default PGNSP PGUID d -1 "" "" 0 )); DESCR("database's default collation"); #define DEFAULT_COLLATION_OID 100 -DATA(insert OID = 950 ( C PGNSP PGUID -1 "C" "C" )); +DATA(insert OID = 950 ( C PGNSP PGUID p -1 "C" "C" 0 )); DESCR("standard C collation"); #define C_COLLATION_OID 950 -DATA(insert OID = 951 ( POSIX PGNSP PGUID -1 "POSIX" "POSIX" )); +DATA(insert OID = 951 ( POSIX PGNSP PGUID p -1 "POSIX" "POSIX" 0 )); DESCR("standard POSIX collation"); #define POSIX_COLLATION_OID 951 + +#define COLLPROVIDER_DEFAULT 'd' +#define COLLPROVIDER_ICU 'i' +#define COLLPROVIDER_POSIX 'p' + #endif /* PG_COLLATION_H */ diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h index 574b288..bda83b9 100644 --- a/src/include/catalog/pg_collation_fn.h +++ b/src/include/catalog/pg_collation_fn.h @@ -16,8 +16,10 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, + char collprovider, int32 collencoding, - const char *collcollate, const char *collctype); + const char *collcollate, const char *collctype, + int32 collversion); extern void RemoveCollationById(Oid collationOid); #endif /* PG_COLLATION_FN_H */ diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index b621ff2..e379058 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -804,6 +804,9 @@ (--enable-float8-byval) */ #undef USE_FLOAT8_BYVAL +/* Define to build with ICU support. (--with-icu) */ +#undef USE_ICU + /* Define to 1 if you want 64-bit integer timestamp and interval support. (--enable-integer-datetimes) */ #undef USE_INTEGER_DATETIMES diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 0a4b9f7..bb48119 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -16,6 +16,10 @@ #if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE) #include #endif +#ifdef USE_ICU +#include +#include +#endif #include "utils/guc.h" @@ -62,17 +66,30 @@ extern void cache_locale_time(void); * We define our own wrapper around locale_t so we can keep the same * function signatures for all builds, while not having to create a * fake version of the standard type locale_t in the global namespace. - * The fake version of pg_locale_t can be checked for truth; that's - * about all it will be needed for. + * pg_locale_t is occasionally checked for truth, so make it a pointer. */ +struct pg_locale_t +{ + char provider; #ifdef HAVE_LOCALE_T -typedef locale_t pg_locale_t; -#else -typedef int pg_locale_t; + locale_t lt; +#endif +#ifdef USE_ICU + const char *locale; + UCollator *ucol; + UConverter *ucnv; #endif +}; + +typedef struct pg_locale_t *pg_locale_t; extern pg_locale_t pg_newlocale_from_collation(Oid collid); +#ifdef USE_ICU +extern int32_t icu_to_uchar(pg_locale_t mylocale, UChar **buff_uchar, const char *buff, size_t nbytes); +extern int32_t icu_from_uchar(pg_locale_t mylocale, char **result, UChar *buff_uchar, int32_t len_uchar); +#endif + /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ #ifdef USE_WIDE_UPPER_LOWER extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, diff --git a/src/test/regress/GNUmakefile b/src/test/regress/GNUmakefile index 6a275cb..52f095b 100644 --- a/src/test/regress/GNUmakefile +++ b/src/test/regress/GNUmakefile @@ -123,6 +123,9 @@ tablespace-setup: ## REGRESS_OPTS = --dlpath=. $(EXTRA_REGRESS_OPTS) +ifeq ($(with_icu),yes) +EXTRA_TESTS += collate.icu +endif check: all tablespace-setup $(pg_regress_check) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(MAXCONNOPT) $(EXTRA_TESTS) diff --git a/src/test/regress/expected/collate.icu.out b/src/test/regress/expected/collate.icu.out new file mode 100644 index 0000000..761bd4b --- /dev/null +++ b/src/test/regress/expected/collate.icu.out @@ -0,0 +1,1076 @@ +/* + * This test is for Linux/glibc systems and assumes that a full set of + * locales is installed. It must be run in a database with utf-8 encoding, + * because other encodings don't support all the characters used. + */ +SET client_encoding TO UTF8; +CREATE TABLE collate_test1 ( + a int, + b text COLLATE "en_US%icu" NOT NULL +); +\d collate_test1 + Table "public.collate_test1" + Column | Type | Modifiers +--------+---------+---------------------------- + a | integer | + b | text | collate en_US%icu not null + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "ja_JP.eucjp%icu" +); +ERROR: collation "ja_JP.eucjp%icu" for encoding "UTF8" does not exist +LINE 3: b text COLLATE "ja_JP.eucjp%icu" + ^ +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "foo%icu" +); +ERROR: collation "foo%icu" for encoding "UTF8" does not exist +LINE 3: b text COLLATE "foo%icu" + ^ +CREATE TABLE collate_test_fail ( + a int COLLATE "en_US%icu", + b text +); +ERROR: collations are not supported by type integer +LINE 2: a int COLLATE "en_US%icu", + ^ +CREATE TABLE collate_test_like ( + LIKE collate_test1 +); +\d collate_test_like + Table "public.collate_test_like" + Column | Type | Modifiers +--------+---------+---------------------------- + a | integer | + b | text | collate en_US%icu not null + +CREATE TABLE collate_test2 ( + a int, + b text COLLATE "sv_SE%icu" +); +CREATE TABLE collate_test3 ( + a int, + b text COLLATE "C" +); +INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC'); +INSERT INTO collate_test2 SELECT * FROM collate_test1; +INSERT INTO collate_test3 SELECT * FROM collate_test1; +SELECT * FROM collate_test1 WHERE b >= 'bbc'; + a | b +---+----- + 3 | bbc +(1 row) + +SELECT * FROM collate_test2 WHERE b >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test3 WHERE b >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test3 WHERE b >= 'BBC'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C"; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US%icu"; +ERROR: collation mismatch between explicit collations "C" and "en_US%icu" +LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e... + ^ +CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE%icu"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE%icu"; -- fails +ERROR: collations are not supported by type integer +CREATE TABLE collate_test4 ( + a int, + b testdomain_sv +); +INSERT INTO collate_test4 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test4 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +CREATE TABLE collate_test5 ( + a int, + b testdomain_sv COLLATE "en_US%icu" +); +INSERT INTO collate_test5 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test5 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b FROM collate_test1 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b FROM collate_test2 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test3 ORDER BY b; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- star expansion +SELECT * FROM collate_test1 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT * FROM collate_test2 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT * FROM collate_test3 ORDER BY b; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- constant expression folding +SELECT 'bbc' COLLATE "en_US%icu" > 'äbc' COLLATE "en_US%icu" AS "true"; + true +------ + t +(1 row) + +SELECT 'bbc' COLLATE "sv_SE%icu" > 'äbc' COLLATE "sv_SE%icu" AS "false"; + false +------- + f +(1 row) + +-- upper/lower +CREATE TABLE collate_test10 ( + a int, + x text COLLATE "en_US%icu", + y text COLLATE "tr_TR%icu" +); +INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ'); +SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10; + a | lower | lower | upper | upper | initcap | initcap +---+-------+-------+-------+-------+---------+--------- + 1 | hij | hij | HIJ | HİJ | Hij | Hij + 2 | hij | hıj | HIJ | HIJ | Hij | Hıj +(2 rows) + +SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hij +(2 rows) + +SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a; + a | x | y +---+-----+----- + 2 | HIJ | HIJ + 1 | hij | hij +(2 rows) + +-- LIKE/ILIKE +SELECT * FROM collate_test1 WHERE b LIKE 'abc'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b LIKE 'abc%'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b LIKE '%bc%'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE 'abc'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE 'abc%'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(4 rows) + +SELECT 'Türkiye' COLLATE "en_US%icu" ILIKE '%KI%' AS "true"; + true +------ + t +(1 row) + +SELECT 'Türkiye' COLLATE "tr_TR%icu" ILIKE '%KI%' AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US%icu" AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR%icu" AS "true"; + true +------ + t +(1 row) + +-- The following actually exercises the selectivity estimation for ILIKE. +SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; + relname +--------- +(0 rows) + +-- regular expressions +SELECT * FROM collate_test1 WHERE b ~ '^abc$'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b ~ '^abc'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b ~ 'bc'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b ~* '^abc$'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ~* '^abc'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ~* 'bc'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(4 rows) + +SELECT 'Türkiye' COLLATE "en_US%icu" ~* 'KI' AS "true"; + true +------ + t +(1 row) + +SELECT 'Türkiye' COLLATE "tr_TR%icu" ~* 'KI' AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ~* 'BIT' COLLATE "en_US%icu" AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ~* 'BIT' COLLATE "tr_TR%icu" AS "true"; + true +------ + t +(1 row) + +-- The following actually exercises the selectivity estimation for ~*. +SELECT relname FROM pg_class WHERE relname ~* '^abc'; + relname +--------- +(0 rows) + +-- to_char +SET lc_time TO 'tr_TR'; +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); + to_char +------------- + 01 NIS 2010 +(1 row) + +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR%icu"); + to_char +------------- + 01 NİS 2010 +(1 row) + +-- backwards parsing +CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; +CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10; +SELECT table_name, view_definition FROM information_schema.views + WHERE table_name LIKE 'collview%' ORDER BY 1; + table_name | view_definition +------------+-------------------------------------------------------------------------- + collview1 | SELECT collate_test1.a, + + | collate_test1.b + + | FROM collate_test1 + + | WHERE ((collate_test1.b COLLATE "C") >= 'bbc'::text); + collview2 | SELECT collate_test1.a, + + | collate_test1.b + + | FROM collate_test1 + + | ORDER BY (collate_test1.b COLLATE "C"); + collview3 | SELECT collate_test10.a, + + | lower(((collate_test10.x || collate_test10.x) COLLATE "C")) AS lower+ + | FROM collate_test10; +(3 rows) + +-- collation propagation in various expression types +SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2; + a | coalesce +---+---------- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2; + a | coalesce +---+---------- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2; + a | coalesce +---+---------- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hıj +(2 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3; + a | b | greatest +---+-----+---------- + 1 | abc | CCC + 2 | äbc | CCC + 3 | bbc | CCC + 4 | ABC | CCC +(4 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3; + a | b | greatest +---+-----+---------- + 1 | abc | CCC + 3 | bbc | CCC + 4 | ABC | CCC + 2 | äbc | äbc +(4 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3; + a | b | greatest +---+-----+---------- + 4 | ABC | CCC + 1 | abc | abc + 3 | bbc | bbc + 2 | äbc | äbc +(4 rows) + +SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10; + a | x | y | lower | lower +---+-----+-----+-------+------- + 1 | hij | hij | hij | hij + 2 | HIJ | HIJ | hij | hıj +(2 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 2 | äbc + 3 | bbc + 1 | +(4 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 3 | bbc + 2 | äbc + 1 | +(4 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 3 | bbc + 2 | äbc + 1 | +(4 rows) + +SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hıj +(2 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2; + a | b +---+------ + 4 | ABC + 2 | äbc + 1 | abcd + 3 | bbc +(4 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2; + a | b +---+------ + 4 | ABC + 1 | abcd + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2; + a | b +---+------ + 4 | ABC + 1 | abcd + 3 | bbc + 2 | äbc +(4 rows) + +CREATE DOMAIN testdomain AS text; +SELECT a, b::testdomain FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b::testdomain FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b::testdomain FROM collate_test3 ORDER BY 2; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hıj +(2 rows) + +SELECT min(b), max(b) FROM collate_test1; + min | max +-----+----- + abc | bbc +(1 row) + +SELECT min(b), max(b) FROM collate_test2; + min | max +-----+----- + abc | äbc +(1 row) + +SELECT min(b), max(b) FROM collate_test3; + min | max +-----+----- + ABC | äbc +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test1; + array_agg +------------------- + {abc,ABC,äbc,bbc} +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test2; + array_agg +------------------- + {abc,ABC,bbc,äbc} +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test3; + array_agg +------------------- + {ABC,abc,bbc,äbc} +(1 row) + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 1 | abc + 4 | ABC + 4 | ABC + 2 | äbc + 2 | äbc + 3 | bbc + 3 | bbc +(8 rows) + +SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2; + a | b +---+----- + 3 | bbc + 2 | äbc +(2 rows) + +SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2; + a | b +---+----- + 4 | ABC + 3 | bbc + 2 | äbc +(3 rows) + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(8 rows) + +SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en_US%icu" and "C" +LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en_US%icu" and "C" +LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en_US%icu" and "C" +LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail +ERROR: no collation was derived for column "b" with collatable type text +HINT: Use the COLLATE clause to set the collation explicitly. +-- ideally this would be a parse-time error, but for now it must be run-time: +select x < y from collate_test10; -- fail +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +select x || y from collate_test10; -- ok, because || is not collation aware + ?column? +---------- + hijhij + HIJHIJ +(2 rows) + +select x, y from collate_test10 order by x || y; -- not so ok +ERROR: collation mismatch between implicit collations "en_US%icu" and "tr_TR%icu" +LINE 1: select x, y from collate_test10 order by x || y; + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +-- collation mismatch between recursive and non-recursive term +WITH RECURSIVE foo(x) AS + (SELECT x FROM (VALUES('a' COLLATE "en_US%icu"),('b')) t(x) + UNION ALL + SELECT (x || 'c') COLLATE "de_DE%icu" FROM foo WHERE length(x) < 10) +SELECT * FROM foo; +ERROR: recursive query "foo" column 1 has collation "en_US%icu" in non-recursive term but collation "de_DE%icu" overall +LINE 2: (SELECT x FROM (VALUES('a' COLLATE "en_US%icu"),('b')) t(... + ^ +HINT: Use the COLLATE clause to set the collation of the non-recursive term. +-- casting +SELECT CAST('42' AS text COLLATE "C"); +ERROR: syntax error at or near "COLLATE" +LINE 1: SELECT CAST('42' AS text COLLATE "C"); + ^ +SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- propagation of collation in SQL functions (inlined and non-inlined cases) +-- and plpgsql functions too +CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 $$; +CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 limit 1 $$; +CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql + AS $$ begin return $1 < $2; end $$; +SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, + mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + a | b | lt | mylt | mylt_noninline | mylt_plpgsql +-----+-----+----+------+----------------+-------------- + abc | abc | f | f | f | f + abc | ABC | t | t | t | t + abc | äbc | t | t | t | t + abc | bbc | t | t | t | t + ABC | abc | f | f | f | f + ABC | ABC | f | f | f | f + ABC | äbc | t | t | t | t + ABC | bbc | t | t | t | t + äbc | abc | f | f | f | f + äbc | ABC | f | f | f | f + äbc | äbc | f | f | f | f + äbc | bbc | t | t | t | t + bbc | abc | f | f | f | f + bbc | ABC | f | f | f | f + bbc | äbc | f | f | f | f + bbc | bbc | f | f | f | f +(16 rows) + +SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt, + mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"), + mylt_plpgsql(a.b, b.b COLLATE "C") +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + a | b | lt | mylt | mylt_noninline | mylt_plpgsql +-----+-----+----+------+----------------+-------------- + abc | abc | f | f | f | f + abc | ABC | f | f | f | f + abc | äbc | t | t | t | t + abc | bbc | t | t | t | t + ABC | abc | t | t | t | t + ABC | ABC | f | f | f | f + ABC | äbc | t | t | t | t + ABC | bbc | t | t | t | t + äbc | abc | f | f | f | f + äbc | ABC | f | f | f | f + äbc | äbc | f | f | f | f + äbc | bbc | f | f | f | f + bbc | abc | f | f | f | f + bbc | ABC | f | f | f | f + bbc | äbc | t | t | t | t + bbc | bbc | f | f | f | f +(16 rows) + +-- collation override in plpgsql +CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text := x; + yy text := y; +begin + return xx < yy; +end +$$; +SELECT mylt2('a', 'B' collate "en_US") as t, mylt2('a', 'B' collate "C") as f; + t | f +---+--- + t | f +(1 row) + +CREATE OR REPLACE FUNCTION + mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text COLLATE "POSIX" := x; + yy text := y; +begin + return xx < yy; +end +$$; +SELECT mylt2('a', 'B') as f; + f +--- + f +(1 row) + +SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +CONTEXT: PL/pgSQL function mylt2(text,text) line 6 at RETURN +SELECT mylt2('a', 'B' collate "POSIX") as f; + f +--- + f +(1 row) + +-- polymorphism +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1; + unnest +-------- + abc + ABC + äbc + bbc +(4 rows) + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1; + unnest +-------- + abc + ABC + bbc + äbc +(4 rows) + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1; + unnest +-------- + ABC + abc + bbc + äbc +(4 rows) + +CREATE FUNCTION dup (anyelement) RETURNS anyelement + AS 'select $1' LANGUAGE sql; +SELECT a, dup(b) FROM collate_test1 ORDER BY 2; + a | dup +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, dup(b) FROM collate_test2 ORDER BY 2; + a | dup +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, dup(b) FROM collate_test3 ORDER BY 2; + a | dup +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- indexes +CREATE INDEX collate_test1_idx1 ON collate_test1 (b); +CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C"); +CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically +CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX")); +CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail +ERROR: collations are not supported by type integer +CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail +ERROR: collations are not supported by type integer +LINE 1: ...ATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C... + ^ +SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1; + relname | pg_get_indexdef +--------------------+----------------------------------------------------------------------------------------------------- + collate_test1_idx1 | CREATE INDEX collate_test1_idx1 ON collate_test1 USING btree (b) + collate_test1_idx2 | CREATE INDEX collate_test1_idx2 ON collate_test1 USING btree (b COLLATE "C") + collate_test1_idx3 | CREATE INDEX collate_test1_idx3 ON collate_test1 USING btree (b COLLATE "C") + collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX") +(4 rows) + +-- schema manipulation commands +CREATE ROLE regress_test_role; +CREATE SCHEMA test_schema; +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name +ERROR: collation "test0" for encoding "UTF8" already exists +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype +ERROR: parameter "lc_ctype" must be specified +CREATE COLLATION testx (locale = 'nonsense'); -- fail +ERROR: could not create locale "nonsense": No such file or directory +DETAIL: The operating system could not find any locale data for the locale name "nonsense". +CREATE COLLATION test4 FROM nonsense; +ERROR: collation "nonsense" for encoding "UTF8" does not exist +CREATE COLLATION test5 FROM test0; +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; + collname +---------- + test0 + test1 + test5 +(3 rows) + +ALTER COLLATION test1 RENAME TO test11; +ALTER COLLATION test0 RENAME TO test11; -- fail +ERROR: collation "test11" for encoding "UTF8" already exists in schema "public" +ALTER COLLATION test1 RENAME TO test22; -- fail +ERROR: collation "test1" for encoding "UTF8" does not exist +ALTER COLLATION test11 OWNER TO regress_test_role; +ALTER COLLATION test11 OWNER TO nonsense; +ERROR: role "nonsense" does not exist +ALTER COLLATION test11 SET SCHEMA test_schema; +COMMENT ON COLLATION test0 IS 'US English'; +SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation') + FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid) + WHERE collname LIKE 'test%' + ORDER BY 1; + collname | nspname | obj_description +----------+-------------+----------------- + test0 | public | US English + test11 | test_schema | + test5 | public | +(3 rows) + +DROP COLLATION test0, test_schema.test11, test5; +DROP COLLATION test0; -- fail +ERROR: collation "test0" for encoding "UTF8" does not exist +DROP COLLATION IF EXISTS test0; +NOTICE: collation "test0" does not exist, skipping +SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; + collname +---------- +(0 rows) + +DROP SCHEMA test_schema; +DROP ROLE regress_test_role; +-- dependencies +CREATE COLLATION test0 FROM "C"; +CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); +CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; +CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0); +CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo; +CREATE TABLE collate_dep_test4t (a int, b text); +CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); +DROP COLLATION test0 RESTRICT; -- fail +ERROR: cannot drop collation test0 because other objects depend on it +DETAIL: table collate_dep_test1 column b depends on collation test0 +type collate_dep_dom1 depends on collation test0 +composite type collate_dep_test2 column y depends on collation test0 +view collate_dep_test3 depends on collation test0 +index collate_dep_test4i depends on collation test0 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +DROP COLLATION test0 CASCADE; +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table collate_dep_test1 column b +drop cascades to type collate_dep_dom1 +drop cascades to composite type collate_dep_test2 column y +drop cascades to view collate_dep_test3 +drop cascades to index collate_dep_test4i +\d collate_dep_test1 +Table "public.collate_dep_test1" + Column | Type | Modifiers +--------+---------+----------- + a | integer | + +\d collate_dep_test2 +Composite type "public.collate_dep_test2" + Column | Type | Modifiers +--------+---------+----------- + x | integer | + +DROP TABLE collate_dep_test1, collate_dep_test4t; +DROP TYPE collate_dep_test2; +-- test range types and collations +create type textrange_c as range(subtype=text, collation="C"); +create type textrange_en_us as range(subtype=text, collation="en_US%icu"); +select textrange_c('A','Z') @> 'b'::text; + ?column? +---------- + f +(1 row) + +select textrange_en_us('A','Z') @> 'b'::text; + ?column? +---------- + t +(1 row) + +drop type textrange_c; +drop type textrange_en_us; diff --git a/src/test/regress/sql/collate.icu.sql b/src/test/regress/sql/collate.icu.sql new file mode 100644 index 0000000..874d2f8 --- /dev/null +++ b/src/test/regress/sql/collate.icu.sql @@ -0,0 +1,398 @@ +/* + * This test is for Linux/glibc systems and assumes that a full set of + * locales is installed. It must be run in a database with utf-8 encoding, + * because other encodings don't support all the characters used. + */ + +SET client_encoding TO UTF8; + + +CREATE TABLE collate_test1 ( + a int, + b text COLLATE "en_US%icu" NOT NULL +); + +\d collate_test1 + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "ja_JP.eucjp%icu" +); + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "foo%icu" +); + +CREATE TABLE collate_test_fail ( + a int COLLATE "en_US%icu", + b text +); + +CREATE TABLE collate_test_like ( + LIKE collate_test1 +); + +\d collate_test_like + +CREATE TABLE collate_test2 ( + a int, + b text COLLATE "sv_SE%icu" +); + +CREATE TABLE collate_test3 ( + a int, + b text COLLATE "C" +); + +INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC'); +INSERT INTO collate_test2 SELECT * FROM collate_test1; +INSERT INTO collate_test3 SELECT * FROM collate_test1; + +SELECT * FROM collate_test1 WHERE b >= 'bbc'; +SELECT * FROM collate_test2 WHERE b >= 'bbc'; +SELECT * FROM collate_test3 WHERE b >= 'bbc'; +SELECT * FROM collate_test3 WHERE b >= 'BBC'; + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C"; +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en_US%icu"; + + +CREATE DOMAIN testdomain_sv AS text COLLATE "sv_SE%icu"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv_SE%icu"; -- fails +CREATE TABLE collate_test4 ( + a int, + b testdomain_sv +); +INSERT INTO collate_test4 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test4 ORDER BY b; + +CREATE TABLE collate_test5 ( + a int, + b testdomain_sv COLLATE "en_US%icu" +); +INSERT INTO collate_test5 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test5 ORDER BY b; + + +SELECT a, b FROM collate_test1 ORDER BY b; +SELECT a, b FROM collate_test2 ORDER BY b; +SELECT a, b FROM collate_test3 ORDER BY b; + +SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; + +-- star expansion +SELECT * FROM collate_test1 ORDER BY b; +SELECT * FROM collate_test2 ORDER BY b; +SELECT * FROM collate_test3 ORDER BY b; + +-- constant expression folding +SELECT 'bbc' COLLATE "en_US%icu" > 'äbc' COLLATE "en_US%icu" AS "true"; +SELECT 'bbc' COLLATE "sv_SE%icu" > 'äbc' COLLATE "sv_SE%icu" AS "false"; + +-- upper/lower + +CREATE TABLE collate_test10 ( + a int, + x text COLLATE "en_US%icu", + y text COLLATE "tr_TR%icu" +); + +INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ'); + +SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10; +SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10; + +SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a; + +-- LIKE/ILIKE + +SELECT * FROM collate_test1 WHERE b LIKE 'abc'; +SELECT * FROM collate_test1 WHERE b LIKE 'abc%'; +SELECT * FROM collate_test1 WHERE b LIKE '%bc%'; +SELECT * FROM collate_test1 WHERE b ILIKE 'abc'; +SELECT * FROM collate_test1 WHERE b ILIKE 'abc%'; +SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; + +SELECT 'Türkiye' COLLATE "en_US%icu" ILIKE '%KI%' AS "true"; +SELECT 'Türkiye' COLLATE "tr_TR%icu" ILIKE '%KI%' AS "false"; + +SELECT 'bıt' ILIKE 'BIT' COLLATE "en_US%icu" AS "false"; +SELECT 'bıt' ILIKE 'BIT' COLLATE "tr_TR%icu" AS "true"; + +-- The following actually exercises the selectivity estimation for ILIKE. +SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; + +-- regular expressions + +SELECT * FROM collate_test1 WHERE b ~ '^abc$'; +SELECT * FROM collate_test1 WHERE b ~ '^abc'; +SELECT * FROM collate_test1 WHERE b ~ 'bc'; +SELECT * FROM collate_test1 WHERE b ~* '^abc$'; +SELECT * FROM collate_test1 WHERE b ~* '^abc'; +SELECT * FROM collate_test1 WHERE b ~* 'bc'; + +SELECT 'Türkiye' COLLATE "en_US%icu" ~* 'KI' AS "true"; +SELECT 'Türkiye' COLLATE "tr_TR%icu" ~* 'KI' AS "false"; + +SELECT 'bıt' ~* 'BIT' COLLATE "en_US%icu" AS "false"; +SELECT 'bıt' ~* 'BIT' COLLATE "tr_TR%icu" AS "true"; + +-- The following actually exercises the selectivity estimation for ~*. +SELECT relname FROM pg_class WHERE relname ~* '^abc'; + + +-- to_char + +SET lc_time TO 'tr_TR'; +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR%icu"); + + +-- backwards parsing + +CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; +CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10; + +SELECT table_name, view_definition FROM information_schema.views + WHERE table_name LIKE 'collview%' ORDER BY 1; + + +-- collation propagation in various expression types + +SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2; +SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2; +SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2; +SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10; + +SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3; +SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3; +SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3; +SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10; + +SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2; +SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2; +SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2; +SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10; + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2; +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2; +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2; + +CREATE DOMAIN testdomain AS text; +SELECT a, b::testdomain FROM collate_test1 ORDER BY 2; +SELECT a, b::testdomain FROM collate_test2 ORDER BY 2; +SELECT a, b::testdomain FROM collate_test3 ORDER BY 2; +SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2; +SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10; + +SELECT min(b), max(b) FROM collate_test1; +SELECT min(b), max(b) FROM collate_test2; +SELECT min(b), max(b) FROM collate_test3; + +SELECT array_agg(b ORDER BY b) FROM collate_test1; +SELECT array_agg(b ORDER BY b) FROM collate_test2; +SELECT array_agg(b ORDER BY b) FROM collate_test3; + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2; +SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2; +SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2; +SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2; + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok +SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok +SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail + +CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail + +-- ideally this would be a parse-time error, but for now it must be run-time: +select x < y from collate_test10; -- fail +select x || y from collate_test10; -- ok, because || is not collation aware +select x, y from collate_test10 order by x || y; -- not so ok + +-- collation mismatch between recursive and non-recursive term +WITH RECURSIVE foo(x) AS + (SELECT x FROM (VALUES('a' COLLATE "en_US%icu"),('b')) t(x) + UNION ALL + SELECT (x || 'c') COLLATE "de_DE%icu" FROM foo WHERE length(x) < 10) +SELECT * FROM foo; + + +-- casting + +SELECT CAST('42' AS text COLLATE "C"); + +SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2; +SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2; +SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2; + + +-- propagation of collation in SQL functions (inlined and non-inlined cases) +-- and plpgsql functions too + +CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 $$; + +CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 limit 1 $$; + +CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql + AS $$ begin return $1 < $2; end $$; + +SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, + mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + +SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt, + mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"), + mylt_plpgsql(a.b, b.b COLLATE "C") +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + + +-- collation override in plpgsql + +CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text := x; + yy text := y; +begin + return xx < yy; +end +$$; + +SELECT mylt2('a', 'B' collate "en_US") as t, mylt2('a', 'B' collate "C") as f; + +CREATE OR REPLACE FUNCTION + mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text COLLATE "POSIX" := x; + yy text := y; +begin + return xx < yy; +end +$$; + +SELECT mylt2('a', 'B') as f; +SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations +SELECT mylt2('a', 'B' collate "POSIX") as f; + + +-- polymorphism + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1; +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1; +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1; + +CREATE FUNCTION dup (anyelement) RETURNS anyelement + AS 'select $1' LANGUAGE sql; + +SELECT a, dup(b) FROM collate_test1 ORDER BY 2; +SELECT a, dup(b) FROM collate_test2 ORDER BY 2; +SELECT a, dup(b) FROM collate_test3 ORDER BY 2; + + +-- indexes + +CREATE INDEX collate_test1_idx1 ON collate_test1 (b); +CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C"); +CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically +CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX")); + +CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail +CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail + +SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1; + + +-- schema manipulation commands + +CREATE ROLE regress_test_role; +CREATE SCHEMA test_schema; + +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype +CREATE COLLATION testx (locale = 'nonsense'); -- fail + +CREATE COLLATION test4 FROM nonsense; +CREATE COLLATION test5 FROM test0; + +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; + +ALTER COLLATION test1 RENAME TO test11; +ALTER COLLATION test0 RENAME TO test11; -- fail +ALTER COLLATION test1 RENAME TO test22; -- fail + +ALTER COLLATION test11 OWNER TO regress_test_role; +ALTER COLLATION test11 OWNER TO nonsense; +ALTER COLLATION test11 SET SCHEMA test_schema; + +COMMENT ON COLLATION test0 IS 'US English'; + +SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation') + FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid) + WHERE collname LIKE 'test%' + ORDER BY 1; + +DROP COLLATION test0, test_schema.test11, test5; +DROP COLLATION test0; -- fail +DROP COLLATION IF EXISTS test0; + +SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; + +DROP SCHEMA test_schema; +DROP ROLE regress_test_role; + + +-- dependencies + +CREATE COLLATION test0 FROM "C"; + +CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); +CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; +CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0); +CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo; +CREATE TABLE collate_dep_test4t (a int, b text); +CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); + +DROP COLLATION test0 RESTRICT; -- fail +DROP COLLATION test0 CASCADE; + +\d collate_dep_test1 +\d collate_dep_test2 + +DROP TABLE collate_dep_test1, collate_dep_test4t; +DROP TYPE collate_dep_test2; + +-- test range types and collations + +create type textrange_c as range(subtype=text, collation="C"); +create type textrange_en_us as range(subtype=text, collation="en_US%icu"); + +select textrange_c('A','Z') @> 'b'::text; +select textrange_en_us('A','Z') @> 'b'::text; + +drop type textrange_c; +drop type textrange_en_us;