diff --git a/README.md b/README.md index a191c506..43282e54 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,8 @@ C++ client for [ClickHouse](https://clickhouse.com/). * UUID * Map * Point, Ring, Polygon, MultiPolygon +* JSON - experimental support; requires output_format_native_write_json_as_string=1; data is passed as strings + ## Dependencies In the most basic case one needs only: diff --git a/clickhouse/CMakeLists.txt b/clickhouse/CMakeLists.txt index 0ab1a487..c759dfcb 100644 --- a/clickhouse/CMakeLists.txt +++ b/clickhouse/CMakeLists.txt @@ -16,6 +16,7 @@ SET ( clickhouse-cpp-lib-src columns/geo.cpp columns/ip4.cpp columns/ip6.cpp + columns/json.cpp columns/lowcardinality.cpp columns/nullable.cpp columns/numeric.cpp @@ -60,6 +61,7 @@ SET ( clickhouse-cpp-lib-src columns/geo.h columns/ip4.h columns/ip6.h + columns/json.h columns/itemview.h columns/lowcardinality.h columns/lowcardinalityadaptor.h @@ -221,6 +223,7 @@ INSTALL(FILES columns/factory.h DESTINATION include/clickhouse/columns/) INSTALL(FILES columns/geo.h DESTINATION include/clickhouse/columns/) INSTALL(FILES columns/ip4.h DESTINATION include/clickhouse/columns/) INSTALL(FILES columns/ip6.h DESTINATION include/clickhouse/columns/) +INSTALL(FILES columns/json.h DESTINATION include/clickhouse/columns/) INSTALL(FILES columns/itemview.h DESTINATION include/clickhouse/columns/) INSTALL(FILES columns/lowcardinality.h DESTINATION include/clickhouse/columns/) INSTALL(FILES columns/nothing.h DESTINATION include/clickhouse/columns/) diff --git a/clickhouse/client.h b/clickhouse/client.h index c2608ca3..17e1e404 100644 --- a/clickhouse/client.h +++ b/clickhouse/client.h @@ -10,6 +10,7 @@ #include "columns/geo.h" #include "columns/ip4.h" #include "columns/ip6.h" +#include "columns/json.h" #include "columns/lowcardinality.h" #include "columns/nothing.h" #include "columns/nullable.h" diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index 460d66fa..57220f0a 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -7,6 +7,7 @@ #include "geo.h" #include "ip4.h" #include "ip6.h" +#include "json.h" #include "lowcardinality.h" #include "lowcardinalityadaptor.h" #include "map.h" @@ -136,6 +137,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) { return nullptr; } return std::make_shared(GetASTChildElement(ast, 0).value); + case Type::JSON: + return std::make_shared(); default: return nullptr; } diff --git a/clickhouse/columns/itemview.cpp b/clickhouse/columns/itemview.cpp index 0116070a..610db030 100644 --- a/clickhouse/columns/itemview.cpp +++ b/clickhouse/columns/itemview.cpp @@ -72,6 +72,7 @@ void ItemView::ValidateData(Type::Code type, DataType data) { case Type::Code::String: case Type::Code::FixedString: + case Type::Code::JSON: // value can be of any size return; diff --git a/clickhouse/columns/json.cpp b/clickhouse/columns/json.cpp new file mode 100644 index 00000000..ec6995cf --- /dev/null +++ b/clickhouse/columns/json.cpp @@ -0,0 +1,102 @@ +#include "json.h" +#include "../base/wire_format.h" + +namespace clickhouse { + +enum class JSONSerializationVersion : uint64_t { + // String is the only currently supported serialization of JSON. + // it should be enabled with output_format_native_write_json_as_string=1 + String = 1, +}; + +ColumnJSON::ColumnJSON() + : Column(Type::CreateJSON()) + , data_(std::make_shared()) +{} + +ColumnJSON::ColumnJSON(std::vector data) + : Column(Type::CreateJSON()) + , data_(std::make_shared(std::move(data))) +{} + +void ColumnJSON::Append(std::string_view str) { + data_->Append(str); +} + +void ColumnJSON::Append(const char* str) { + data_->Append(str); +} +void ColumnJSON::Append(std::string&& str) { + data_->Append(std::move(str)); +} + +std::string_view ColumnJSON::At(size_t n) const { + return data_->At(n); +} + +void ColumnJSON::Append(ColumnRef column) { + if (auto col = column->As()) { + data_->Append(col->data_); + } +} + +void ColumnJSON::Reserve(size_t new_cap) { + data_->Reserve(new_cap); +} + +bool ColumnJSON::LoadPrefix(InputStream* input, size_t) { + uint64_t v; + if (!WireFormat::ReadFixed(*input, &v)) { + return false; + } + if (v != static_cast(JSONSerializationVersion::String)) { + // Hard stop: the library can only parse JSON when `output_format_native_write_json_as_string` is enabled. + // Further processing is meaningless after this error and the user must be notified immediately. + throw ProtocolError("Unsupported JSON serialization version. " + "Make sure output_format_native_write_json_as_string=1 is set."); + } + return true; +} + +bool ColumnJSON::LoadBody(InputStream* input, size_t rows) { + return data_->LoadBody(input, rows); +} + +void ColumnJSON::SavePrefix(OutputStream* output) { + WireFormat::WriteFixed(*output, static_cast(JSONSerializationVersion::String)); +} + +void ColumnJSON::SaveBody(OutputStream* output) { + data_->SaveBody(output); +} + +void ColumnJSON::Clear() { + data_->Clear(); +} + +size_t ColumnJSON::Size() const { + return data_->Size(); +} + +ColumnRef ColumnJSON::Slice(size_t begin, size_t len) const { + auto ret = std::make_shared(); + auto sliced_data = data_->Slice(begin, len)->As(); + ret->data_->Swap(*sliced_data); + return ret; +} + +ColumnRef ColumnJSON::CloneEmpty() const +{ + return std::make_shared(); +} + +void ColumnJSON::Swap(Column& other) { + auto & col = dynamic_cast(other); + data_.swap(col.data_); +} + +ItemView ColumnJSON::GetItem(size_t index) const { + return ItemView{Type::JSON, data_->GetItem(index)}; +} + +} diff --git a/clickhouse/columns/json.h b/clickhouse/columns/json.h new file mode 100644 index 00000000..bc9a1ede --- /dev/null +++ b/clickhouse/columns/json.h @@ -0,0 +1,82 @@ +#pragma once + +#include "column.h" +#include "string.h" +#include "nullable.h" + +namespace clickhouse { + +/** + * JSON Column: Represents JSON values as strings. + * Works only when ClickHouse outputs JSON as strings and requires the setting + * output_format_native_write_json_as_string to be set to 1 for selecting data. + * Inserting JSON data does not require setting this setting. + * + * WARNING: THIS IS AN EXPERIMENTAL IMPLEMENTATION. + * The API may change in the future as we continue working on full support for JSON columns. + * + * ClickHouse does not accept empty strings as JSON; it requires an empty object ({}). + * For nullable columns, each row marked a NULL must contain {}. + * For convenience `clickhouse::ColumnNullableT` automatically inserts {} for NULL rows. + */ +class ColumnJSON : public Column { +public: + + ColumnJSON(); + explicit ColumnJSON(std::vector data); + + /// Appends one element to the column. + void Append(std::string_view str); + + void Append(const char* str); + void Append(std::string&& str); + + std::string_view At(size_t n) const; + inline std::string_view operator [] (size_t n) const { return At(n); } + + /// Appends content of given column to the end of current one. + void Append(ColumnRef column) override; + + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + + /// Loads column prefix from input stream. + bool LoadPrefix(InputStream* input, size_t rows) override; + + /// Loads column data from input stream. + bool LoadBody(InputStream* input, size_t rows) override; + + /// Saves column prefix to output stream. Column types with prefixes must implement it. + void SavePrefix(OutputStream* output) override; + + /// Saves column data to output stream. + void SaveBody(OutputStream* output) override; + + /// Clear column data . + void Clear() override; + + /// Returns count of rows in the column. + size_t Size() const override; + + /// Makes slice of the current column. + ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; + void Swap(Column& other) override; + + ItemView GetItem(size_t index) const override; + +private: + std::shared_ptr data_; +}; + +template <> +inline void ColumnNullableT::Append(std::optional value) { + ColumnNullable::Append(!value.has_value()); + if (value.has_value()) { + typed_nested_data_->Append(*value); + } else { + typed_nested_data_->Append(std::string_view("{}")); + } +} + +} diff --git a/clickhouse/types/type_parser.cpp b/clickhouse/types/type_parser.cpp index d488a079..07ffab65 100644 --- a/clickhouse/types/type_parser.cpp +++ b/clickhouse/types/type_parser.cpp @@ -67,6 +67,7 @@ static const std::unordered_map kTypeCode = { { "MultiPolygon", Type::MultiPolygon }, { "Time", Type::Time }, { "Time64", Type::Time64 }, + { "JSON", Type::JSON }, }; template diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index a5588c68..db0f706a 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -54,6 +54,7 @@ const char* Type::TypeName(Type::Code code) { case Type::Code::MultiPolygon: return "MultiPolygon"; case Type::Code::Time: return "Time"; case Type::Code::Time64: return "Time64"; + case Type::Code::JSON: return "JSON"; } return "Unknown type"; @@ -85,6 +86,7 @@ std::string Type::GetName() const { case Ring: case Polygon: case MultiPolygon: + case JSON: return TypeName(code_); case Time64: return As()->GetName(); @@ -138,6 +140,7 @@ uint64_t Type::GetTypeUniqueId() const { case Float32: case Float64: case String: + case JSON: case IPv4: case IPv6: case Date: @@ -279,6 +282,10 @@ TypeRef Type::CreateMultiPolygon() { return TypeRef(new Type(Type::MultiPolygon)); } +TypeRef Type::CreateJSON() { + return TypeRef(new Type(Type::JSON)); +} + /// class ArrayType ArrayType::ArrayType(TypeRef item_type) : Type(Array), item_type_(item_type) { diff --git a/clickhouse/types/types.h b/clickhouse/types/types.h index 2275cfba..ebe60f2e 100644 --- a/clickhouse/types/types.h +++ b/clickhouse/types/types.h @@ -59,6 +59,7 @@ class Type { MultiPolygon, Time, Time64, + JSON, }; using EnumItem = std::pair; @@ -148,6 +149,8 @@ class Type { static TypeRef CreateTime64(size_t precision); + static TypeRef CreateJSON(); + private: uint64_t GetTypeUniqueId() const; diff --git a/ut/Column_ut.cpp b/ut/Column_ut.cpp index f792caf3..78852831 100644 --- a/ut/Column_ut.cpp +++ b/ut/Column_ut.cpp @@ -195,6 +195,7 @@ using TestCases = ::testing::Types< GenericColumnTestCase, std::string, &MakeStrings>, GenericColumnTestCase, std::string, &MakeFixedStrings<12>>, + GenericColumnTestCase, std::string, &MakeJSONs>, GenericColumnTestCase, time_t, &MakeDates>, GenericColumnTestCase, time_t, &MakeDates>, diff --git a/ut/CreateColumnByType_ut.cpp b/ut/CreateColumnByType_ut.cpp index 556dfc36..c00e4bad 100644 --- a/ut/CreateColumnByType_ut.cpp +++ b/ut/CreateColumnByType_ut.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include diff --git a/ut/column_array_ut.cpp b/ut/column_array_ut.cpp index 6fe0bd19..2925eedf 100644 --- a/ut/column_array_ut.cpp +++ b/ut/column_array_ut.cpp @@ -227,6 +227,29 @@ TEST(ColumnArrayT, SimpleFixedString) { EXPECT_EQ("world\0"sv, (*array)[0][1]); } +TEST(ColumnArrayT, JSON) { + using namespace std::literals; + auto i1 = R"({"item": 1})"sv; + auto i2 = R"({"item": 2})"sv; + auto i3 = R"({"item": 3})"sv; + auto array = std::make_shared>(); + array->Append({i1}); + array->Append({i2, i3}); + + EXPECT_EQ(i1, array->At(0).At(0)); + EXPECT_EQ(i2, array->At(1).At(0)); + EXPECT_EQ(i3, array->At(1).At(1)); + + auto r1 = array->At(0); + EXPECT_EQ(1u, r1.Size()); + EXPECT_EQ(i1, r1.At(0)); + + auto r2 = array->At(1); + EXPECT_EQ(2u, r2.Size()); + EXPECT_EQ(i2, r2.At(0)); + EXPECT_EQ(i3, r2.At(1)); +} + TEST(ColumnArrayT, SimpleUInt64_2D) { // Nested 2D-arrays are supported too: auto array = std::make_shared>>(); diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index f799cb55..59cec98c 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -137,6 +137,30 @@ TEST(ColumnsCase, StringAppend) { ASSERT_EQ(col->At(2), "11"); } +TEST(ColumnsCase, JSONInit) { + auto values = MakeJSONs(); + auto col = std::make_shared(values); + + ASSERT_EQ(col->Size(), values.size()); + ASSERT_EQ(col->At(1), values[1]); + ASSERT_EQ(col->At(2), values[2]); + ASSERT_EQ(col->At(3), values[3]); +} + +TEST(ColumnsCase, JSONAppend) { + auto col = std::make_shared(); + const char* expected = "\"ufiudhf3493fyiudferyer3yrifhdflkdjfeuroe\""; + std::string data(expected); + col->Append(data); + col->Append(std::move(data)); + col->Append("11"); + + ASSERT_EQ(col->Size(), 3u); + ASSERT_EQ(col->At(0), expected); + ASSERT_EQ(col->At(1), expected); + ASSERT_EQ(col->At(2), "11"); +} + TEST(ColumnsCase, TupleAppend){ auto tuple1 = std::make_shared(std::vector({ std::make_shared(), diff --git a/ut/itemview_ut.cpp b/ut/itemview_ut.cpp index f53b54a9..a932a0b5 100644 --- a/ut/itemview_ut.cpp +++ b/ut/itemview_ut.cpp @@ -70,6 +70,8 @@ TEST(ItemView, StorableTypes) { TEST_ITEMVIEW_TYPE_VALUE(Type::Code::FixedString, std::string_view, ""); TEST_ITEMVIEW_TYPE_VALUE(Type::Code::FixedString, std::string_view, "here is a string"); + TEST_ITEMVIEW_TYPE_VALUE(Type::Code::JSON, std::string_view, "{}"); + TEST_ITEMVIEW_TYPE_VALUE(Type::Code::JSON, std::string_view, R"({"key": "value"})"); } #define EXPECT_ITEMVIEW_ERROR(TypeCode, NativeType) \ diff --git a/ut/roundtrip_column.cpp b/ut/roundtrip_column.cpp index b578d976..19b18bb0 100644 --- a/ut/roundtrip_column.cpp +++ b/ut/roundtrip_column.cpp @@ -47,7 +47,9 @@ ColumnRef RoundtripColumnValues(Client& client, ColumnRef expected) { client.Insert("temporary_roundtrip_table", block); } - client.Select("SELECT col FROM temporary_roundtrip_table ORDER BY id", [&result](const Block& b) { + std::string query = "SELECT col FROM temporary_roundtrip_table ORDER BY id " + "SETTINGS output_format_native_write_json_as_string=1"; + client.Select(query, [&result](const Block& b) { if (b.GetRowCount() == 0) return; diff --git a/ut/type_parser_ut.cpp b/ut/type_parser_ut.cpp index 4cff5237..09c715e2 100644 --- a/ut/type_parser_ut.cpp +++ b/ut/type_parser_ut.cpp @@ -24,6 +24,15 @@ TEST(TypeParserCase, ParseFixedString) { ASSERT_EQ(ast.elements.front().value, 24U); } +TEST(TypeParserCase, ParseJSON) { + TypeAst ast; + TypeParser("JSON").Parse(&ast); + + ASSERT_EQ(ast.meta, TypeAst::Terminal); + ASSERT_EQ(ast.name, "JSON"); + ASSERT_EQ(ast.code, Type::JSON); +} + TEST(TypeParserCase, ParseArray) { TypeAst ast; TypeParser("Array(Int32)").Parse(&ast); diff --git a/ut/types_ut.cpp b/ut/types_ut.cpp index 7af343b5..0345520e 100644 --- a/ut/types_ut.cpp +++ b/ut/types_ut.cpp @@ -113,7 +113,8 @@ TEST(TypesCase, IsEqual) { "Point", "Ring", "Polygon", - "MultiPolygon" + "MultiPolygon", + "JSON", }; // Check that Type::IsEqual returns true only if: diff --git a/ut/utils.cpp b/ut/utils.cpp index 5c0dec92..989f7a5c 100644 --- a/ut/utils.cpp +++ b/ut/utils.cpp @@ -166,6 +166,7 @@ std::ostream & printColumnValue(const ColumnRef& c, const size_t row, std::ostre const auto r = false || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) + || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) || doPrintValue(c, row, ostr) @@ -378,6 +379,7 @@ std::ostream& operator<<(std::ostream& ostr, const ItemView& item_view) { break; case Type::String: case Type::FixedString: + case Type::JSON: ostr << "\"" << item_view.data << "\" (" << item_view.data.size() << " bytes)"; break; case Type::Date: diff --git a/ut/utils_ut.cpp b/ut/utils_ut.cpp index 43d3cae8..e1de32fb 100644 --- a/ut/utils_ut.cpp +++ b/ut/utils_ut.cpp @@ -235,6 +235,7 @@ TEST(ItemView, OutputToOstream_VALID) { // Positive cases: output should be generated EXPECTED_SERIALIZATION("String : \"string\" (6 bytes)", ColumnString(), "string"); EXPECTED_SERIALIZATION("FixedString : \"string\" (6 bytes)", ColumnFixedString(6), "string"); + EXPECTED_SERIALIZATION(R"(JSON : "{"key": "value"}" (16 bytes))", ColumnJSON(), R"({"key": "value"})"); EXPECTED_SERIALIZATION("Int8 : -123", ColumnInt8(), -123); EXPECTED_SERIALIZATION("Int16 : -1234", ColumnInt16(), -1234); diff --git a/ut/value_generators.cpp b/ut/value_generators.cpp index 25b38f15..2c1215be 100644 --- a/ut/value_generators.cpp +++ b/ut/value_generators.cpp @@ -51,6 +51,15 @@ std::vector MakeStrings() { }; } +std::vector MakeJSONs() { + return { + R"({})", + R"({"a":"1"})", + R"({"age":"30","name":"Alice"})", + R"({"escapes":"line1\nline2\t\"quoted\"","unicode":"éèê"})", + }; +} + std::vector MakeUUIDs() { return { UUID(0llu, 0llu), diff --git a/ut/value_generators.h b/ut/value_generators.h index 889c74e0..3a47fb51 100644 --- a/ut/value_generators.h +++ b/ut/value_generators.h @@ -35,6 +35,7 @@ std::vector MakeNumbers(); std::vector MakeBools(); std::vector MakeFixedStrings(size_t string_size); std::vector MakeStrings(); +std::vector MakeJSONs(); std::vector MakeDateTime64s(size_t scale, size_t values_size = 200); std::vector MakeDates32(); std::vector MakeDateTimes();