The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/**
 * @copyright
 * ====================================================================
 *    Licensed to the Apache Software Foundation (ASF) under one
 *    or more contributor license agreements.  See the NOTICE file
 *    distributed with this work for additional information
 *    regarding copyright ownership.  The ASF licenses this file
 *    to you under the Apache License, Version 2.0 (the
 *    "License"); you may not use this file except in compliance
 *    with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing,
 *    software distributed under the License is distributed on an
 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *    KIND, either express or implied.  See the License for the
 *    specific language governing permissions and limitations
 *    under the License.
 * ====================================================================
 * @endcopyright
 *
 * @file svn_utf.h
 * @brief UTF-8 conversion routines
 *
 * Whenever a conversion routine cannot convert to or from UTF-8, the
 * error returned has code @c APR_EINVAL.
 */



#ifndef SVN_UTF_H
#define SVN_UTF_H

#include <apr_pools.h>
#include <apr_xlate.h>  /* for APR_*_CHARSET */

#include "svn_types.h"
#include "svn_string.h"

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

#define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
#define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET

/**
 * Initialize the UTF-8 encoding/decoding routines.
 * Allocate cached translation handles in a subpool of @a pool.
 *
 * If @a assume_native_utf8 is TRUE, the native character set is
 * assumed to be UTF-8, i.e. conversion is a no-op. This is useful
 * in contexts where the native character set is ASCII but UTF-8
 * should be used regardless (e.g. for mod_dav_svn which runs within
 * httpd and always uses the "C" locale).
 *
 * @note It is optional to call this function, but if it is used, no other
 * svn function may be in use in other threads during the call of this
 * function or when @a pool is cleared or destroyed.
 * Initializing the UTF-8 routines will improve performance.
 *
 * @since New in 1.8.
 */
void
svn_utf_initialize2(svn_boolean_t assume_native_utf8,
                    apr_pool_t *pool);

/**
 * Like svn_utf_initialize2() but without the ability to force the
 * native encoding to UTF-8.
 *
 * @deprecated Provided for backward compatibility with the 1.7 API.
 */
SVN_DEPRECATED
void
svn_utf_initialize(apr_pool_t *pool);

/** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
 * allocate @a *dest in @a pool.
 */
svn_error_t *
svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
                          const svn_stringbuf_t *src,
                          apr_pool_t *pool);


/** Set @a *dest to a utf8-encoded string from native string @a src; allocate
 * @a *dest in @a pool.
 */
svn_error_t *
svn_utf_string_to_utf8(const svn_string_t **dest,
                       const svn_string_t *src,
                       apr_pool_t *pool);


/** Set @a *dest to a utf8-encoded C string from native C string @a src;
 * allocate @a *dest in @a pool.
 */
svn_error_t *
svn_utf_cstring_to_utf8(const char **dest,
                        const char *src,
                        apr_pool_t *pool);


/** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
 * string @a src; allocate @a *dest in @a pool.
 *
 * @since New in 1.4.
 */
svn_error_t *
svn_utf_cstring_to_utf8_ex2(const char **dest,
                            const char *src,
                            const char *frompage,
                            apr_pool_t *pool);


/** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
 * ignored.
 *
 * @deprecated Provided for backward compatibility with the 1.3 API.
 */
SVN_DEPRECATED
svn_error_t *
svn_utf_cstring_to_utf8_ex(const char **dest,
                           const char *src,
                           const char *frompage,
                           const char *convset_key,
                           apr_pool_t *pool);


/** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
 * allocate @a *dest in @a pool.
 */
svn_error_t *
svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
                            const svn_stringbuf_t *src,
                            apr_pool_t *pool);


/** Set @a *dest to a natively-encoded string from utf8 string @a src;
 * allocate @a *dest in @a pool.
 */
svn_error_t *
svn_utf_string_from_utf8(const svn_string_t **dest,
                         const svn_string_t *src,
                         apr_pool_t *pool);


/** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
 * allocate @a *dest in @a pool.
 */
svn_error_t *
svn_utf_cstring_from_utf8(const char **dest,
                          const char *src,
                          apr_pool_t *pool);


/** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
 * @a src; allocate @a *dest in @a pool.
 *
 * @since New in 1.4.
 */
svn_error_t *
svn_utf_cstring_from_utf8_ex2(const char **dest,
                              const char *src,
                              const char *topage,
                              apr_pool_t *pool);


/** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
 * ignored.
 *
 * @deprecated Provided for backward compatibility with the 1.3 API.
 */
SVN_DEPRECATED
svn_error_t *
svn_utf_cstring_from_utf8_ex(const char **dest,
                             const char *src,
                             const char *topage,
                             const char *convset_key,
                             apr_pool_t *pool);


/** Return a fuzzily native-encoded C string from utf8 C string @a src,
 * allocated in @a pool.  A fuzzy recoding leaves all 7-bit ascii
 * characters the same, and substitutes "?\\XXX" for others, where XXX
 * is the unsigned decimal code for that character.
 *
 * This function cannot error; it is guaranteed to return something.
 * First it will recode as described above and then attempt to convert
 * the (new) 7-bit UTF-8 string to native encoding.  If that fails, it
 * will return the raw fuzzily recoded string, which may or may not be
 * meaningful in the client's locale, but is (presumably) better than
 * nothing.
 *
 * ### Notes:
 *
 * Improvement is possible, even imminent.  The original problem was
 * that if you converted a UTF-8 string (say, a log message) into a
 * locale that couldn't represent all the characters, you'd just get a
 * static placeholder saying "[unconvertible log message]".  Then
 * Justin Erenkrantz pointed out how on platforms that didn't support
 * conversion at all, "svn log" would still fail completely when it
 * encountered unconvertible data.
 *
 * Now for both cases, the caller can at least fall back on this
 * function, which converts the message as best it can, substituting
 * "?\\XXX" escape codes for the non-ascii characters.
 *
 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
 * so when we can detect that at configure time, things will change.
 * Also, this should (?) be moved to apr/apu eventually.
 *
 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
 * details.
 */
const char *
svn_utf_cstring_from_utf8_fuzzy(const char *src,
                                apr_pool_t *pool);


/** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
 * allocate @a *dest in @a pool.
 */
svn_error_t *
svn_utf_cstring_from_utf8_stringbuf(const char **dest,
                                    const svn_stringbuf_t *src,
                                    apr_pool_t *pool);


/** Set @a *dest to a natively-encoded C string from utf8 string @a src;
 * allocate @a *dest in @a pool.
 */
svn_error_t *
svn_utf_cstring_from_utf8_string(const char **dest,
                                 const svn_string_t *src,
                                 apr_pool_t *pool);

/** Return the display width of UTF-8-encoded C string @a cstr.
 * If the string is not printable or invalid UTF-8, return -1.
 *
 * @since New in 1.8.
 */
int
svn_utf_cstring_utf8_width(const char *cstr);

#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif /* SVN_UTF_H */