svn_utf.h

Go to the documentation of this file.
00001 /**
00002  * @copyright
00003  * ====================================================================
00004  *    Licensed to the Apache Software Foundation (ASF) under one
00005  *    or more contributor license agreements.  See the NOTICE file
00006  *    distributed with this work for additional information
00007  *    regarding copyright ownership.  The ASF licenses this file
00008  *    to you under the Apache License, Version 2.0 (the
00009  *    "License"); you may not use this file except in compliance
00010  *    with the License.  You may obtain a copy of the License at
00011  *
00012  *      http://www.apache.org/licenses/LICENSE-2.0
00013  *
00014  *    Unless required by applicable law or agreed to in writing,
00015  *    software distributed under the License is distributed on an
00016  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
00017  *    KIND, either express or implied.  See the License for the
00018  *    specific language governing permissions and limitations
00019  *    under the License.
00020  * ====================================================================
00021  * @endcopyright
00022  *
00023  * @file svn_utf.h
00024  * @brief UTF-8 conversion routines
00025  *
00026  * Whenever a conversion routine cannot convert to or from UTF-8, the
00027  * error returned has code @c APR_EINVAL.
00028  */
00029 
00030 
00031 
00032 #ifndef SVN_UTF_H
00033 #define SVN_UTF_H
00034 
00035 #include <apr_pools.h>
00036 #include <apr_xlate.h>  /* for APR_*_CHARSET */
00037 
00038 #include "svn_types.h"
00039 #include "svn_string.h"
00040 
00041 #ifdef __cplusplus
00042 extern "C" {
00043 #endif /* __cplusplus */
00044 
00045 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
00046 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
00047 
00048 /**
00049  * Initialize the UTF-8 encoding/decoding routines.
00050  * Allocate cached translation handles in a subpool of @a pool.
00051  *
00052  * If @a assume_native_utf8 is TRUE, the native character set is
00053  * assumed to be UTF-8, i.e. conversion is a no-op. This is useful
00054  * in contexts where the native character set is ASCII but UTF-8
00055  * should be used regardless (e.g. for mod_dav_svn which runs within
00056  * httpd and always uses the "C" locale).
00057  *
00058  * @note It is optional to call this function, but if it is used, no other
00059  * svn function may be in use in other threads during the call of this
00060  * function or when @a pool is cleared or destroyed.
00061  * Initializing the UTF-8 routines will improve performance.
00062  *
00063  * @since New in 1.8.
00064  */
00065 void
00066 svn_utf_initialize2(svn_boolean_t assume_native_utf8,
00067                     apr_pool_t *pool);
00068 
00069 /**
00070  * Like svn_utf_initialize2() but without the ability to force the
00071  * native encoding to UTF-8.
00072  *
00073  * @deprecated Provided for backward compatibility with the 1.7 API.
00074  */
00075 SVN_DEPRECATED
00076 void
00077 svn_utf_initialize(apr_pool_t *pool);
00078 
00079 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
00080  * allocate @a *dest in @a pool.
00081  */
00082 svn_error_t *
00083 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
00084                           const svn_stringbuf_t *src,
00085                           apr_pool_t *pool);
00086 
00087 
00088 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
00089  * @a *dest in @a pool.
00090  */
00091 svn_error_t *
00092 svn_utf_string_to_utf8(const svn_string_t **dest,
00093                        const svn_string_t *src,
00094                        apr_pool_t *pool);
00095 
00096 
00097 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
00098  * allocate @a *dest in @a pool.
00099  */
00100 svn_error_t *
00101 svn_utf_cstring_to_utf8(const char **dest,
00102                         const char *src,
00103                         apr_pool_t *pool);
00104 
00105 
00106 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
00107  * string @a src; allocate @a *dest in @a pool.
00108  *
00109  * @since New in 1.4.
00110  */
00111 svn_error_t *
00112 svn_utf_cstring_to_utf8_ex2(const char **dest,
00113                             const char *src,
00114                             const char *frompage,
00115                             apr_pool_t *pool);
00116 
00117 
00118 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
00119  * ignored.
00120  *
00121  * @deprecated Provided for backward compatibility with the 1.3 API.
00122  */
00123 SVN_DEPRECATED
00124 svn_error_t *
00125 svn_utf_cstring_to_utf8_ex(const char **dest,
00126                            const char *src,
00127                            const char *frompage,
00128                            const char *convset_key,
00129                            apr_pool_t *pool);
00130 
00131 
00132 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
00133  * allocate @a *dest in @a pool.
00134  */
00135 svn_error_t *
00136 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
00137                             const svn_stringbuf_t *src,
00138                             apr_pool_t *pool);
00139 
00140 
00141 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
00142  * allocate @a *dest in @a pool.
00143  */
00144 svn_error_t *
00145 svn_utf_string_from_utf8(const svn_string_t **dest,
00146                          const svn_string_t *src,
00147                          apr_pool_t *pool);
00148 
00149 
00150 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
00151  * allocate @a *dest in @a pool.
00152  */
00153 svn_error_t *
00154 svn_utf_cstring_from_utf8(const char **dest,
00155                           const char *src,
00156                           apr_pool_t *pool);
00157 
00158 
00159 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
00160  * @a src; allocate @a *dest in @a pool.
00161  *
00162  * @since New in 1.4.
00163  */
00164 svn_error_t *
00165 svn_utf_cstring_from_utf8_ex2(const char **dest,
00166                               const char *src,
00167                               const char *topage,
00168                               apr_pool_t *pool);
00169 
00170 
00171 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
00172  * ignored.
00173  *
00174  * @deprecated Provided for backward compatibility with the 1.3 API.
00175  */
00176 SVN_DEPRECATED
00177 svn_error_t *
00178 svn_utf_cstring_from_utf8_ex(const char **dest,
00179                              const char *src,
00180                              const char *topage,
00181                              const char *convset_key,
00182                              apr_pool_t *pool);
00183 
00184 
00185 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
00186  * allocated in @a pool.  A fuzzy recoding leaves all 7-bit ascii
00187  * characters the same, and substitutes "?\\XXX" for others, where XXX
00188  * is the unsigned decimal code for that character.
00189  *
00190  * This function cannot error; it is guaranteed to return something.
00191  * First it will recode as described above and then attempt to convert
00192  * the (new) 7-bit UTF-8 string to native encoding.  If that fails, it
00193  * will return the raw fuzzily recoded string, which may or may not be
00194  * meaningful in the client's locale, but is (presumably) better than
00195  * nothing.
00196  *
00197  * ### Notes:
00198  *
00199  * Improvement is possible, even imminent.  The original problem was
00200  * that if you converted a UTF-8 string (say, a log message) into a
00201  * locale that couldn't represent all the characters, you'd just get a
00202  * static placeholder saying "[unconvertible log message]".  Then
00203  * Justin Erenkrantz pointed out how on platforms that didn't support
00204  * conversion at all, "svn log" would still fail completely when it
00205  * encountered unconvertible data.
00206  *
00207  * Now for both cases, the caller can at least fall back on this
00208  * function, which converts the message as best it can, substituting
00209  * "?\\XXX" escape codes for the non-ascii characters.
00210  *
00211  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
00212  * so when we can detect that at configure time, things will change.
00213  * Also, this should (?) be moved to apr/apu eventually.
00214  *
00215  * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
00216  * details.
00217  */
00218 const char *
00219 svn_utf_cstring_from_utf8_fuzzy(const char *src,
00220                                 apr_pool_t *pool);
00221 
00222 
00223 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
00224  * allocate @a *dest in @a pool.
00225  */
00226 svn_error_t *
00227 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
00228                                     const svn_stringbuf_t *src,
00229                                     apr_pool_t *pool);
00230 
00231 
00232 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
00233  * allocate @a *dest in @a pool.
00234  */
00235 svn_error_t *
00236 svn_utf_cstring_from_utf8_string(const char **dest,
00237                                  const svn_string_t *src,
00238                                  apr_pool_t *pool);
00239 
00240 /** Return the display width of UTF-8-encoded C string @a cstr.
00241  * If the string is not printable or invalid UTF-8, return -1.
00242  *
00243  * @since New in 1.8.
00244  */
00245 int
00246 svn_utf_cstring_utf8_width(const char *cstr);
00247 
00248 #ifdef __cplusplus
00249 }
00250 #endif /* __cplusplus */
00251 
00252 #endif /* SVN_UTF_H */

Generated on Thu Aug 10 22:21:59 2017 for Subversion by  doxygen 1.4.7