pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/ww898/utf-cpp/commit/0c70ff950c9bbb1fa1b03fc665597adf6a72dfbc

er-9e07ff8eaaaff3a3.css" /> Add C++17 features, add basic_string / basic_string_view support. · ww898/utf-cpp@0c70ff9 · GitHub
Skip to content

Commit 0c70ff9

Browse files
committed
Add C++17 features, add basic_string / basic_string_view support.
1 parent 2ee0b7c commit 0c70ff9

File tree

4 files changed

+206
-42
lines changed

4 files changed

+206
-42
lines changed

README.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,13 @@ std::vector<char> u8;
3939
convz<utf32, utf8>(u32.data(), std::back_inserter(u8));
4040
std::wstring uw;
4141
conv<utf8, utfw>(u8s, u8s + sizeof(u8s), std::back_inserter(uw));
42-
static_assert(is_utf_same<decltype(*u8s), decltype(u8)::value_type>::value, "Fail");
43-
static_assert(1 ==
44-
(is_utf_same<decltype(u16)::value_type, decltype(uw)::value_type>::value ? 1 : 0) +
45-
(is_utf_same<decltype(u32)::value_type, decltype(uw)::value_type>::value ? 1 : 0), "Fail");
42+
auto u8r = conv<char>(uw);
43+
auto uwr = convz<wchar_t>(u8s);
44+
auto u32r = conv<char32_t>(std::string_view(u8r.data(), u8r.size())); // C++17 only
45+
static_assert(is_utf_same_v<decltype(*u8s), decltype(u8)::value_type>, "Fail"); // C++17 only
46+
static_assert(
47+
is_utf_same<decltype(u16)::value_type, decltype(uw)::value_type>::value !=
48+
is_utf_same<decltype(u32)::value_type, decltype(uw)::value_type>::value, "Fail");
4649
```
4750
4851
## Performance

include/ww898/utf_converters.hpp

Lines changed: 107 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,30 @@
2424

2525
#pragma once
2626

27+
#if !defined(__cpp_lib_string_view)
28+
#if defined(_MSVC_LANG)
29+
#define __cpp_lib_string_view _MSVC_LANG
30+
#else
31+
#define __cpp_lib_string_view __cplusplus
32+
#endif
33+
#endif
34+
35+
#if !defined(__cpp_constexpr)
36+
#if defined(_MSVC_LANG)
37+
#define __cpp_constexpr _MSVC_LANG
38+
#else
39+
#define __cpp_constexpr __cplusplus
40+
#endif
41+
#endif
42+
2743
#include <cstdint>
2844
#include <stdexcept>
2945
#include <iterator>
46+
#include <string>
47+
48+
#if __cpp_lib_string_view >= 201606
49+
#include <string_view>
50+
#endif
3051

3152
namespace ww898 {
3253
namespace utf {
@@ -287,6 +308,55 @@ struct utf32 final
287308
}
288309
};
289310

311+
namespace detail {
312+
313+
template<
314+
size_t wchar_size>
315+
struct wchar_selector {};
316+
317+
template<> struct wchar_selector<2> { typedef utf16 type; };
318+
template<> struct wchar_selector<4> { typedef utf32 type; };
319+
320+
}
321+
322+
typedef detail::wchar_selector<sizeof(wchar_t)>::type utfw;
323+
324+
namespace detail {
325+
326+
template<
327+
typename Ch>
328+
struct utf_selector {};
329+
330+
template<> struct utf_selector< char> { typedef utf8 type; };
331+
template<> struct utf_selector<unsigned char> { typedef utf8 type; };
332+
template<> struct utf_selector<signed char> { typedef utf8 type; };
333+
template<> struct utf_selector<char16_t > { typedef utf16 type; };
334+
template<> struct utf_selector<char32_t > { typedef utf32 type; };
335+
template<> struct utf_selector<wchar_t > { typedef utfw type; };
336+
337+
}
338+
339+
template<
340+
typename Ch>
341+
using utf_selector = detail::utf_selector<typename std::decay<Ch>::type>;
342+
343+
template<
344+
typename Ch>
345+
using utf_selector_t = typename utf_selector<Ch>::type;
346+
347+
template<
348+
typename Ch1,
349+
typename Ch2>
350+
using is_utf_same = std::is_same<utf_selector_t<Ch1>, utf_selector_t<Ch2>>;
351+
352+
#if __cpp_constexpr >= 201603
353+
template<
354+
typename Ch1,
355+
typename Ch2>
356+
inline constexpr bool is_utf_same_v = is_utf_same<Ch1, Ch2>::value;
357+
#endif
358+
359+
290360
template<
291361
typename Utf,
292362
typename It>
@@ -473,45 +543,53 @@ Oit conv(It && it, Eit && eit, Oit && oit)
473543
std::forward<Oit>(oit));
474544
}
475545

476-
namespace detail {
477-
478546
template<
479-
size_t wchar_size>
480-
struct wchar_selector {};
481-
482-
template<> struct wchar_selector<2> { typedef utf16 type; };
483-
template<> struct wchar_selector<4> { typedef utf32 type; };
484-
547+
typename Outf,
548+
typename Ch,
549+
typename Oit>
550+
Oit convz(Ch const * const str, Oit && oit)
551+
{
552+
return convz<utf_selector_t<Ch>, Outf>(str, std::forward<Oit>(oit));
485553
}
486554

487-
typedef detail::wchar_selector<sizeof(wchar_t)>::type utfw;
488-
489-
namespace detail {
490-
491555
template<
492-
typename Ch>
493-
struct utf_selector {};
494-
495-
template<> struct utf_selector< char> { typedef utf8 type; };
496-
template<> struct utf_selector<unsigned char> { typedef utf8 type; };
497-
template<> struct utf_selector<signed char> { typedef utf8 type; };
498-
template<> struct utf_selector<char16_t > { typedef utf16 type; };
499-
template<> struct utf_selector<char32_t > { typedef utf32 type; };
500-
template<> struct utf_selector<wchar_t > { typedef utfw type; };
501-
556+
typename Och,
557+
typename Str>
558+
std::basic_string<Och> convz(Str && str)
559+
{
560+
std::basic_string<Och> res;
561+
convz<utf_selector_t<Och>>(std::forward<Str>(str), std::back_inserter(res));
562+
return res;
502563
}
503564

504565
template<
505-
typename Ch>
506-
using utf_selector = detail::utf_selector<typename std::decay<Ch>::type>;
566+
typename Outf,
567+
typename Ch,
568+
typename Oit>
569+
Oit conv(std::basic_string<Ch> const & str, Oit && oit)
570+
{
571+
return conv<utf_selector_t<Ch>, Outf>(str.cbegin(), str.cend(), std::forward<Oit>(oit));
572+
}
507573

574+
#if __cpp_lib_string_view >= 201606
508575
template<
509-
typename Ch>
510-
using utf_selector_t = typename utf_selector<Ch>::type;
576+
typename Outf,
577+
typename Ch,
578+
typename Oit>
579+
Oit conv(std::basic_string_view<Ch> const & str, Oit && oit)
580+
{
581+
return conv<utf_selector_t<Ch>, Outf>(str.cbegin(), str.cend(), std::forward<Oit>(oit));
582+
}
583+
#endif
511584

512585
template<
513-
typename Ch1,
514-
typename Ch2>
515-
using is_utf_same = std::is_same<utf_selector_t<Ch1>, utf_selector_t<Ch2>>;
586+
typename Och,
587+
typename Str>
588+
std::basic_string<Och> conv(Str && str)
589+
{
590+
std::basic_string<Och> res;
591+
conv<utf_selector_t<Och>>(std::forward<Str>(str), std::back_inserter(res));
592+
return res;
593+
}
516594

517595
}}

test/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
3434
"$<$<CONFIG:Release>:/Ot>"
3535
"$<$<CONFIG:Release>:/Oi>"
3636
"$<$<CONFIG:Release>:/Oy->")
37+
38+
if(MSVC_VERSION MATCHES "^191[0-9]$")
39+
target_compile_options(utf-cpp-test PRIVATE /std:c++17)
40+
elseif(MSVC_VERSION STREQUAL 1900)
41+
target_compile_options(utf-cpp-test PRIVATE /std:c++14)
42+
elseif(MSVC_VERSION STREQUAL 1800)
43+
else()
44+
message(FATAL_ERROR "Unknown Microsoft Visual C++ compiler version ${MSVC_VERSION}")
45+
endif()
46+
3747
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
3848
target_compile_options(utf-cpp-test PRIVATE -std=c++11 -Wall -Wextra)
3949
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")

test/utf_converters_test.cpp

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
* SOFTWARE.
2323
*/
2424

25+
#if defined(_WIN32)
26+
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
27+
#endif
28+
2529
#include <ww898/utf_converters.hpp>
2630

2731
#if defined(_WIN32)
@@ -46,7 +50,7 @@
4650
#if defined(__linux__) || defined(__APPLE__)
4751
#include <chrono>
4852

49-
#ifdef __MACH__
53+
#if defined(__MACH__)
5054
#include <mach/clock.h>
5155
#include <mach/mach.h>
5256
#endif
@@ -385,10 +389,12 @@ template<
385389
typename Ch>
386390
struct utf_namer {};
387391

388-
template<> struct utf_namer<char > { static char const value[]; };
389-
template<> struct utf_namer<char16_t> { static char const value[]; };
390-
template<> struct utf_namer<char32_t> { static char const value[]; };
391-
template<> struct utf_namer<wchar_t > { static char const value[]; };
392+
template<> struct utf_namer< char> { static char const value[]; };
393+
template<> struct utf_namer<unsigned char> : utf_namer<char> {};
394+
template<> struct utf_namer<signed char> : utf_namer<char> {};
395+
template<> struct utf_namer<char16_t > { static char const value[]; };
396+
template<> struct utf_namer<char32_t > { static char const value[]; };
397+
template<> struct utf_namer<wchar_t > { static char const value[]; };
392398

393399
char const utf_namer<char >::value[] = "UTF8";
394400
char const utf_namer<char16_t>::value[] = "UTF16";
@@ -516,6 +522,57 @@ BOOST_STATIC_ASSERT( utf::is_utf_same<wchar_t, char32_t>::value);
516522
#error Unknown platform
517523
#endif
518524

525+
#if __cpp_constexpr >= 201603
526+
527+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<unsigned char, char>);
528+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<unsigned char, unsigned char>);
529+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<unsigned char, signed char>);
530+
531+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<signed char, char>);
532+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<signed char, unsigned char>);
533+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<signed char, signed char>);
534+
535+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<char, char>);
536+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<char, unsigned char>);
537+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<char, signed char>);
538+
539+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<char16_t, char16_t>);
540+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<char32_t, char32_t>);
541+
BOOST_STATIC_ASSERT(utf::is_utf_same_v<wchar_t , wchar_t >);
542+
543+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char, char16_t>);
544+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char, char32_t>);
545+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char, wchar_t >);
546+
547+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char16_t, char>);
548+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char32_t, char>);
549+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<wchar_t , char>);
550+
551+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char16_t, char32_t>);
552+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char32_t, char16_t>);
553+
554+
#if defined(_WIN32)
555+
556+
BOOST_STATIC_ASSERT( utf::is_utf_same_v<char16_t, wchar_t>);
557+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char32_t, wchar_t>);
558+
559+
BOOST_STATIC_ASSERT( utf::is_utf_same_v<wchar_t, char16_t>);
560+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<wchar_t, char32_t>);
561+
562+
#elif defined(__linux__) || defined(__APPLE__)
563+
564+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<char16_t, wchar_t>);
565+
BOOST_STATIC_ASSERT( utf::is_utf_same_v<char32_t, wchar_t>);
566+
567+
BOOST_STATIC_ASSERT(!utf::is_utf_same_v<wchar_t, char16_t>);
568+
BOOST_STATIC_ASSERT( utf::is_utf_same_v<wchar_t, char32_t>);
569+
570+
#else
571+
#error Unknown platform
572+
#endif
573+
574+
#endif
575+
519576
namespace {
520577

521578
uint64_t get_time() throw()
@@ -560,7 +617,7 @@ uint64_t get_time() throw()
560617
#if defined(__linux__) || defined(__APPLE__)
561618
void current_utc_time(timespec * ts)
562619
{
563-
#ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time
620+
#if defined(__MACH__) // OS X does not have clock_gettime, use clock_get_time
564621
clock_serv_t cclock;
565622
mach_timespec_t mts;
566623
if (host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock) != KERN_SUCCESS)
@@ -727,6 +784,8 @@ BOOST_AUTO_TEST_CASE(performance, WW898_PERFORMANCE_TESTS_MODE)
727784
}
728785

729786
std::cout <<
787+
"__cpp_lib_string_view: " << std::dec << __cpp_lib_string_view << std::endl <<
788+
"__cpp_constexpr : " << std::dec << __cpp_constexpr << std::endl <<
730789
"sizeof wchar_t: " << sizeof(wchar_t) << std::endl <<
731790
utf_namer<wchar_t >::value << ": UTF" << 8 * sizeof(wchar_t) << std::endl;
732791

@@ -853,10 +912,24 @@ BOOST_AUTO_TEST_CASE(example, WW898_PERFORMANCE_TESTS_MODE)
853912
convz<utf32, utf8>(u32.data(), std::back_inserter(u8));
854913
std::wstring uw;
855914
conv<utf8, utfw>(u8s, u8s + sizeof(u8s), std::back_inserter(uw));
915+
auto u8r = conv<char>(uw);
916+
auto uwr = convz<wchar_t>(u8s);
917+
918+
#if __cpp_lib_string_view >= 201606
919+
auto u32r = conv<char32_t>(std::string_view(u8r.data(), u8r.size())); // C++17 only
920+
#endif
921+
856922
static_assert(is_utf_same<decltype(*u8s), decltype(u8)::value_type>::value, "Fail");
857-
static_assert(1 ==
858-
(is_utf_same<decltype(u16)::value_type, decltype(uw)::value_type>::value ? 1 : 0) +
859-
(is_utf_same<decltype(u32)::value_type, decltype(uw)::value_type>::value ? 1 : 0), "Fail");
923+
static_assert(
924+
is_utf_same<decltype(u16)::value_type, decltype(uw)::value_type>::value !=
925+
is_utf_same<decltype(u32)::value_type, decltype(uw)::value_type>::value, "Fail");
926+
927+
#if __cpp_constexpr >= 201603
928+
static_assert(is_utf_same_v<decltype(*u8s), decltype(u8)::value_type>, "Fail"); // C++17 only
929+
static_assert(
930+
is_utf_same_v<decltype(u16)::value_type, decltype(uw)::value_type> !=
931+
is_utf_same_v<decltype(u32)::value_type, decltype(uw)::value_type>, "Fail"); // C++17 only
932+
#endif
860933
}
861934

862935
#undef WW898_PERFORMANCE_TESTS_MODE

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy