Search code examples
azureazure-storageazure-sdk

400 XML specified is not syntactically valid


I am using azure-sdk-for-cpp and I am getting 400 XML specified is not syntactically valid when using CommitBlockList. Is there a way I could see what is wrong with XML? I've tried to read logs, there is no request body, and I have no idea what is wrong.

void TestClass::Flush()
{  
    UploadBlock();
    for (auto& task : tasks) {
        if (task.valid()) {
            task.get();
        }
    }

    tasks.clear();

    std::copy(uncomitted_blocks_ids.begin(), uncomitted_blocks_ids.end(), 
    std::back_inserter(committed_blocks_ids));


    blob_client.AsBlockBlobClient().CommitBlockList(committed_blocks_ids);

    uncomitted_blocks_ids.clear();
}

void TestClass::Close()
{     
    Flush();

    committed_blocks_ids.clear();
    uncomitted_blocks_ids.clear();
}
void TestClass::UploadBlock()
{
    auto it= m_tasks.begin();
    try {
        for(; it != m_tasks.end(); /*++it*/) {
            if(it->wait_for(std::chrono::seconds(0)) == std::future_status::ready || 
                    m_tasks.size() >= m_network_connections_max){
                it->get();
                it= m_tasks.erase(it);
            }
            else {
                ++it;
            }
        }
    }
    catch(::Azure::Storage::StorageException&) {
        tasks.erase(it);
        throw;
    }
    auto block_id = GetBlockId(std::to_string(block_count++));

    m_tasks.push_back(std::async(std::launch::async, [this, block_data= m_buffer, block_id]() {
        auto block_content = ::Azure::Core::IO::MemoryBodyStream(block_data.data(), block_data.size());

        uncomitted_blocks_ids.push_back(block_id);

        blob_client.AsBlockBlobClient().StageBlock(block_id, block_content);
    }));
}

Solution

  • 400 XML specified is not syntactically valid when using CommitBlockList

    The 400 XML specified is not syntactically valid error when using CommitBlockList generally indicates that the XML payload generated for committing blocks is invalid or malformed.

    You can refer these two Microsoft documents DOC-1 and DOC-2 for uploading proper upload block blobs in the Azure Blob storage.

    Here I used the sample code which will upload the file and correct block Id.

    Code:

    #include <azure/storage/blobs.hpp>
    #include <fstream>
    #include <future>
    #include <iostream>
    #include <iomanip>
    #include <sstream>
    #include <vector>
    #include <chrono>
    #include <openssl/evp.h>  // OpenSSL for Base64 encoding
    
    using namespace Azure::Storage::Blobs;
    
    const int BlockSize = 4 * 1024 * 1024;  // 4 MB blocks
    const int MaxConcurrency = 8;
    
    class TestClass {
    public:
        TestClass(BlockBlobClient blobClient)
            : blob_client(blobClient), block_count(0) {}
    
        void UploadBlock();
        void Flush();
        void Close();
    
    private:
        std::string Base64Encode(const std::string& input);
        std::string GetBlockId(const std::string& blockNumber);
    
        BlockBlobClient blob_client;
        std::vector<std::future<void>> m_tasks; // Store async tasks
        std::vector<std::string> uncomitted_blocks_ids;  // Store uncommitted block IDs
        std::vector<std::string> committed_blocks_ids;   // Store committed block IDs
        std::vector<uint8_t> m_buffer;  // Buffer to hold file data
        int block_count;
        size_t m_network_connections_max = MaxConcurrency;  // Max parallel uploads
    };
    
    std::string TestClass::Base64Encode(const std::string& input) {
        size_t encoded_size = 4 * ((input.size() + 2) / 3);
        std::string encoded_string(encoded_size, '\0');
        int len = EVP_EncodeBlock(reinterpret_cast<unsigned char*>(&encoded_string[0]),
            reinterpret_cast<const unsigned char*>(input.c_str()),
            input.size());
        encoded_string.resize(len);  // Adjust to the actual length
        return encoded_string;
    }
    
    std::string TestClass::GetBlockId(const std::string& blockNumber) {
        std::stringstream ss;
        ss << std::setw(8) << std::setfill('0') << blockNumber;
        return Base64Encode(ss.str());  // Return Base64-encoded block ID
    }
    
    void TestClass::UploadBlock() {
        // Read file content into m_buffer
        std::ifstream file("C:\\Users\\v-vsettu\\Downloads\\sample5.pdf", std::ios::binary | std::ios::ate);
        if (!file) {
            std::cerr << "Failed to open file." << std::endl;
            throw std::runtime_error("Failed to open file.");
        }
    
        std::streamsize file_size = file.tellg();
        file.seekg(0, std::ios::beg);
        m_buffer.resize(file_size);
        if (!file.read(reinterpret_cast<char*>(m_buffer.data()), file_size)) {
            std::cerr << "Failed to read file." << std::endl;
            throw std::runtime_error("Failed to read file.");
        }
    
        // Split the buffer into blocks and upload
        int offset = 0;
        while (offset < file_size) {
            int current_block_size = std::min(BlockSize, static_cast<int>(file_size) - offset);
            std::vector<uint8_t> block_data(m_buffer.begin() + offset, m_buffer.begin() + offset + current_block_size);
            auto block_id = GetBlockId(std::to_string(block_count++));
            uncomitted_blocks_ids.push_back(block_id);
            m_tasks.push_back(std::async(std::launch::async, [this, block_data = block_data, block_id]() {
                auto block_content = Azure::Core::IO::MemoryBodyStream(block_data.data(), block_data.size());
                blob_client.StageBlock(block_id, block_content);
                }));
    
            offset += current_block_size;
        }
    }
    
    void TestClass::Flush() {
        // Wait for all tasks to complete
        for (auto& task : m_tasks) {
            if (task.valid()) {
                task.get();
            }
        }
    
        // Clear tasks after completion
        m_tasks.clear();
    
        // Commit the block list
        blob_client.CommitBlockList(uncomitted_blocks_ids);
    
        // Add uncommitted blocks to committed list
        std::copy(uncomitted_blocks_ids.begin(), uncomitted_blocks_ids.end(),
            std::back_inserter(committed_blocks_ids));
    
        // Clear the uncommitted blocks list
        uncomitted_blocks_ids.clear();
        std::cout << "Upload completed successfully." << std::endl;
    }
    
    void TestClass::Close() {
        Flush();
        committed_blocks_ids.clear();
        uncomitted_blocks_ids.clear();
    }
    
    int main() {
        // Define your connection string, container/blob name, and file path.
        std::string connection_string = "xxxxx";
        std::string container_name = "data";
        std::string blob_name = "test.pdf";
    
        // Create a BlockBlobClient using the connection string.
        BlobContainerClient container_client = BlobContainerClient::CreateFromConnectionString(connection_string, container_name);
        BlockBlobClient block_blob_client = container_client.GetBlockBlobClient(blob_name);
    
        // Create an instance of TestClass
        TestClass test_class(block_blob_client);
    
        // Perform the upload with parallelism.
        test_class.UploadBlock();
        test_class.Flush();
    
        return 0;
    }
    

    Output:

    Upload completed successfully.
    

    enter image description here

    Portal: enter image description here