Search code examples
c++stdmapbad-allocconst-iterator

std::map::begin() returns an iterator with garbage


typedef unsigned long Count;
typedef float Weight;
typedef std::map<std::string, Count> StringToCountMap;
typedef std::map<std::string, Weight> StringToWeightMap;
typedef std::map<unsigned long, StringToCountMap> UnsignedToStringToCountMap;
typedef std::map<unsigned long, StringToWeightMap> UnsignedToStringToWeightMap;

typedef std::map<unsigned long, std::size_t> ClustersMap;


class DefaultClusteringAlgorithm
{
public:
    // minumum number of documents changing clusters for algorithm to end
    static const unsigned DocumentChangeThreshold = 0;

    DefaultClusteringAlgorithm(unsigned numClusters, const UnsignedToStringToWeightMap &documentVectors)
        : numClusters_(numClusters)
        , documentVectors_(documentVectors)
    {
    }

~DefaultClusteringAlgorithm() {}

const ClustersMap &DoClustering();

private:
    void ChooseInitialCentroids();
    unsigned ClusterOnCentroids();
    void RecalculateCentroids();
    float DocumentDotProduct(const StringToWeightMap &left, const StringToWeightMap &right);
    float DocumentLength(const StringToWeightMap &document);

    unsigned numClusters_;

    // stores cluster_id => centroid
    std::vector<StringToWeightMap> centroids_;

    // maps question id => cluster id
    ClustersMap clusters_;

    // document vector
    const UnsignedToStringToWeightMap &documentVectors_;
};

void DefaultClusteringAlgorithm::RecalculateCentroids()
{
    std::vector<unsigned> newCentroidsSizes(centroids_.size());
    std::vector<StringToWeightMap> newCentroids(centroids_.size());

    ClustersMap::const_iterator clusterMapping = clusters_.begin();

    for (; clusterMapping != clusters_.end(); ++clusterMapping)
    {
        std::size_t clusterId = clusterMapping->second;

        ++newCentroidsSizes[clusterId];
        const StringToWeightMap &document = documentVectors_.at(clusterMapping->first);

        StringToWeightMap::const_iterator termWeight = document.cbegin();

        for (; termWeight != document.end(); ++termWeight);
        {
            newCentroids[clusterId][termWeight->first] += termWeight->second;
        }
    }

    std::vector<unsigned>::iterator centroidSize = newCentroidsSizes.begin();

    for (; centroidSize != newCentroidsSizes.end(); ++centroidSize)
    {
        std::size_t clusterId = centroidSize - newCentroidsSizes.begin();

        StringToWeightMap::iterator centroidTermWeight = newCentroids[clusterId].begin();

        for (; centroidTermWeight != newCentroids[clusterId].end(); ++centroidTermWeight)
        {
            centroidTermWeight->second /= *centroidSize;
        }
    }
}

debugger watch

The problem occurs in creating the const_iterator termWeight:

StringToWeightMap::const_iterator termWeight = document.begin();

As you can see in the image above the termWeight const_iterator has invalid data. However, the const std::map document is a perfectly valid std::map. I cannot think of any reason why this is happening.

I recently learned that std::map::cbegin() exists. Should I be using that method instead?

EDIT: Included more context


Solution

  • Hah! I found the error! A silly little semi-colon at the end of my for loop!