utils/cdx-dump/main.cpp (374 lines of code) (raw):
/****************************************************************************
* Copyright (C) from 2024 to Present EPAM Systems.
*
* This file is part of Indigo toolkit.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
#include "base_cpp/scanner.h"
#include <cstdint>
#include <fstream>
#include <iostream>
#include <molecule/CDXCommons.h>
#include <molecule/molecule_json_saver.h>
#include <string>
#include <vector>
using namespace indigo;
template <typename T>
std::string toHex(T val)
{
std::ostringstream oss;
oss << "0x" << std::setw(sizeof(T) * 2) << std::setfill('0') << std::hex << val;
return oss.str();
}
template std::string toHex<>(uint16_t);
template std::string toHex<>(uint32_t);
std::string toHex(Array<byte>& arr, int count)
{
std::ostringstream oss;
for (int i = 0; i < count; i++)
{
if (i > 0)
oss << ' ';
oss << std::setw(2) << std::setfill('0') << std::hex << static_cast<unsigned short>(arr[i]);
}
return oss.str().c_str();
}
template <typename T>
void save_hex(indigo::JsonWriter& json, T val)
{
std::string hex = toHex(val);
json.RawNumber(hex.c_str(), static_cast<rapidjson::SizeType>(hex.size()));
}
std::string coordToStr(uint16_t lo, uint16_t hi)
{
double dlo = lo / 65536.0;
double dhi = hi;
return std::to_string(dhi + dlo);
}
template <typename T>
static T read(byte*& ptr, uint32_t& size)
{
T res = *reinterpret_cast<T*>(ptr);
unsigned int shift = sizeof(T);
ptr += shift;
size -= shift;
return res;
}
template uint16_t read(byte*&, uint32_t&);
static void saveProperty(uint16_t tag, uint32_t len, Array<byte>& buf, indigo::JsonWriter& json, bool is_object_tag = false)
{
json.StartObject();
json.Key("tag");
save_hex(json, tag);
json.Key("len");
json.Uint(len);
json.Key("hex");
json.String(toHex(buf, len).c_str());
uint16_t* p16 = reinterpret_cast<uint16_t*>(buf.ptr());
auto it = KCDXPropToName.find(tag);
if (it != KCDXPropToName.end() || tag == 0x1500 || tag == 0x1501)
{
ECDXType type = ECDXType::CDXString;
if (it != KCDXPropToName.end())
{
json.Key("prop_name");
json.String(it->second.first.c_str());
type = it->second.second;
}
switch (type)
{
case ECDXType::CDXCoordinate: {
json.Key("coord");
json.String(coordToStr(p16[0], p16[1]).c_str());
break;
}
case ECDXType::CDXPoint2D: {
json.Key("x");
json.String(coordToStr(p16[2], p16[3]).c_str());
json.Key("y");
json.String(coordToStr(p16[0], p16[1]).c_str());
break;
}
case ECDXType::CDXPoint3D: {
json.Key("x");
json.String(coordToStr(p16[4], p16[5]).c_str());
json.Key("y");
json.String(coordToStr(p16[2], p16[3]).c_str());
json.Key("z");
json.String(coordToStr(p16[0], p16[1]).c_str());
break;
}
case ECDXType::CDXRectangle: {
json.Key("top");
json.String(coordToStr(p16[0], p16[1]).c_str());
json.Key("left");
json.String(coordToStr(p16[2], p16[3]).c_str());
json.Key("bottom");
json.String(coordToStr(p16[4], p16[5]).c_str());
json.Key("right");
json.String(coordToStr(p16[6], p16[7]).c_str());
break;
}
case ECDXType::CDXUINT8:
json.Key("val");
json.Uint(buf[0]);
break;
case ECDXType::CDXUINT16:
json.Key("val");
json.Uint(*p16);
break;
case ECDXType::CDXINT8:
json.Key("val");
json.Int(*reinterpret_cast<int8_t*>(buf.ptr()));
break;
case ECDXType::CDXINT16:
json.Key("val");
json.Int(*reinterpret_cast<int16_t*>(buf.ptr()));
break;
case ECDXType::CDXObjectID: {
json.Key("ids");
std::string ids;
uint32_t* p32 = reinterpret_cast<uint32_t*>(buf.ptr());
for (int i = 0; i < len / sizeof(uint32_t); i++)
{
if (ids.size())
ids += " ";
ids += toHex(p32[i]);
}
json.String(ids.c_str());
break;
}
case ECDXType::CDXString: {
byte* ptr = buf.ptr();
if (!(tag == kCDXProp_Name && is_object_tag))
{
auto style_runs = read<uint16_t>(ptr, len);
if (style_runs * sizeof(CDXTextStyle) > len)
{
ptr -= sizeof(uint16_t);
len += sizeof(uint16_t);
}
else if (style_runs > 0)
{
json.Key("style_runs");
json.StartArray();
for (int i = 0; i < style_runs; i++)
{
json.StartObject();
json.Key("start");
json.Uint(read<uint16_t>(ptr, len));
json.Key("font");
json.Uint(read<uint16_t>(ptr, len));
json.Key("style");
save_hex(json, read<uint16_t>(ptr, len));
json.Key("size");
json.Double(read<uint16_t>(ptr, len) / 20.0);
json.Key("color");
json.Uint(read<uint16_t>(ptr, len));
json.EndObject();
}
json.EndArray();
}
}
json.Key("str");
json.String(reinterpret_cast<char*>(ptr), len);
break;
}
default:
break;
}
}
json.EndObject();
}
void readProperty(uint16_t tag, Scanner& scan, indigo::JsonWriter& json, bool is_object_tag = false)
{
int len = 0;
uint16_t size;
scan.read(2, &size);
len = size;
Array<byte> buf;
if (len > 0)
{
if (UINT16_MAX == len)
{
scan.read(4, &len);
}
buf.expandFill(len, 0);
scan.read(len, buf.ptr());
}
saveProperty(tag, len, buf, json, is_object_tag);
}
static void readObject(uint16_t tag, Scanner& scan, indigo::JsonWriter& json);
static void readObjOrProp(uint16_t tag, Scanner& scan, indigo::JsonWriter& json, bool is_object_tag = false)
{
if (tag >= kCDXTag_Object)
{
readObject(tag, scan, json);
}
else
{
readProperty(tag, scan, json, is_object_tag);
}
}
static void readObject(uint16_t object_tag, Scanner& scan, indigo::JsonWriter& json)
{
json.StartObject();
json.Key("tag");
save_hex(json, object_tag);
auto it = KCDXObjToName.find(object_tag);
if (it != KCDXObjToName.end())
{
json.Key("obj_name");
json.String(it->second.c_str());
}
uint32_t id;
scan.read(4, &id);
json.Key("id");
save_hex(json, id);
json.Key("content");
json.StartArray();
while (1)
{
uint16_t tag;
scan.read(2, &tag);
if (tag == 0)
break;
readObjOrProp(tag, scan, json, object_tag == kCDXObj_ObjectTag);
}
json.EndArray();
json.EndObject();
}
void print_usage()
{
printf("Usage: cdx-dump [-p] file.cdx\ncdx-dump [-r] file.json file.cdx\n-p for pretty json\n-r for reverse mode - from json to cdx");
}
void parse_cdx(const char* filename, bool pretty_json)
{
FileScanner sc(filename);
// Skip header
sc.seek(22, SEEK_CUR); // VcjD0100 + 0x01020304 + 10 zero bytes
rapidjson::StringBuffer s;
indigo::JsonWriter json(pretty_json);
json.Reset(s);
uint16_t tag;
json.StartArray();
bool first = true;
while (!sc.isEOF())
{
sc.read(sizeof(tag), &tag);
if (first) // marvin and ketcher write 0x0000 instead of 0x8000 for first document
{
if (tag == 0)
tag = kCDXObj_Document;
first = false;
}
if (tag == 0)
break; // end of file
readObjOrProp(tag, sc, json);
}
json.EndArray();
printf("\n%s\n", s.GetString());
}
template <typename T>
void write(std::ofstream& ofs, T val)
{
ofs.write(reinterpret_cast<char*>(&val), sizeof(T));
}
void save_nodes(std::ofstream& cdx, rapidjson::Value& nodes)
{
for (rapidjson::SizeType node_idx = 0; node_idx < nodes.Size(); ++node_idx)
{
auto& node = nodes[node_idx];
if (node.HasMember("tag"))
{
uint16_t tag = UINT16_MAX & std::stol(node["tag"].GetString(), nullptr, 0);
write(cdx, tag);
if (tag >= kCDXTag_Object)
{
uint32_t id = UINT32_MAX & std::stol(node["id"].GetString(), nullptr, 0);
write(cdx, id);
if (node.HasMember("content"))
{
save_nodes(cdx, node["content"]);
}
write<uint16_t>(cdx, 0);
}
else // save property
{
uint32_t llen = node["len"].GetUint();
uint16_t slen = UINT16_MAX;
if (llen >= slen)
{
write(cdx, slen);
write(cdx, llen);
}
else
{
slen = slen & llen;
write(cdx, slen);
}
std::istringstream hex(node["hex"].GetString());
std::string sbyte;
while (getline(hex, sbyte, ' '))
{
uint8_t b = UINT8_MAX & std::stoi(sbyte, nullptr, 16);
write(cdx, b);
}
}
}
}
}
void json_to_cdx(const char* json_file_name, const char* cdx_filename)
{
std::ifstream json_file(json_file_name);
std::stringstream json;
json << json_file.rdbuf();
rapidjson::Document data;
if (!data.Parse(json.str().c_str()).HasParseError())
{
std::ofstream cdx(cdx_filename, std::ios::binary);
cdx << kCDX_HeaderString;
write(cdx, kCDXMagicNumber);
cdx.write(kCDXReserved, sizeof(kCDXReserved));
save_nodes(cdx, data);
write<uint16_t>(cdx, 0);
cdx.close();
}
else
{
printf("Parse error %d at offset %zu.", data.GetParseError(), data.GetErrorOffset());
}
}
int main(int argc, char* argv[])
{
std::vector<std::string_view> input_files;
const std::vector<std::string_view> args(argv + 1, argv + argc);
bool pretty_json = false;
bool reverse = false;
for (const auto& arg : args)
{
if (arg == "-p")
{
pretty_json = true;
continue;
}
if (arg == "-r")
{
reverse = true;
continue;
}
input_files.push_back(arg);
}
if (input_files.empty())
{
print_usage();
return 0;
}
if (pretty_json && reverse)
{
printf("-p has no sense in reverse mode.\n");
print_usage();
return 0;
}
if (reverse)
{
json_to_cdx(input_files[0].data(), input_files[1].data());
}
else
{
parse_cdx(input_files[0].data(), pretty_json);
}
}