A Discrete-Event Network Simulator
API
hash-example.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation;
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15  */
16 
17 #include <algorithm> // find
18 #include <climits> // CHAR_BIT
19 #include <fstream>
20 #include <iostream>
21 #include <iomanip>
22 #include <map>
23 #include <vector>
24 
25 #include "ns3/core-module.h"
26 #include "ns3/hash.h"
27 
105 namespace ns3
106 {
107 
108 NS_LOG_COMPONENT_DEFINE ("Hasher");
109 
110 namespace Hash
111 {
112 
117 namespace Example
118 {
119 
123 class Collider {
124 
125 public:
126  std::string m_name;
130  enum Bits {
133  };
134 
142  Collider (const std::string name, Hasher hash, const enum Bits bits)
143  : m_name (name),
144  m_hash (hash),
145  m_bits (bits)
146  { }
147 
154  bool Add (const std::string phrase)
155  {
156  uint64_t h = GetHash (phrase);
157 
158  // Check for collisions
159  if (m_dict.count (h) > 0)
160  {
161  // we've seen this hash before, why?
162  if (phrase == m_dict[h])
163  {
164  // duplicate phrase
165  return false;
166  }
167 
168  // new phrase generating a real collision
169  // alphabetize
170  if ( m_dict[h] < phrase)
171  {
172  m_coll.push_back (std::make_pair (h, phrase));
173  }
174  else
175  {
176  m_coll.push_back (std::make_pair (h, m_dict[h]));
177  m_dict[h] = phrase;
178  }
179  }
180  else
181  {
182  // Insert new hash
183  m_dict.insert (std::make_pair (h, phrase));
184  }
185  return true;
186  } // Add ()
187 
191  std::string GetName () const
192  {
193  std::string name = m_name;
194 
195  switch (m_bits)
196  {
197  case Bits32: name += " (32-bit version)"; break;
198  case Bits64: name += " (64-bit version)"; break;
199  default: name += " (unknown!?!)";
200  }
201  return name;
202  }
203 
205  void Report () const
206  {
207  std::cout << std::endl;
208 
209  std::cout << GetName () << ": " << m_coll.size () << " collisions:"
210  << std::endl;
211  for (collision_t::const_iterator it = m_coll.begin ();
212  it != m_coll.end ();
213  ++it)
214  {
215  uint64_t h = it->first;
216 
217  std::cout << std::setfill ('0') << std::hex << std::setw(8) << h
218  << std::dec << std::setfill(' ') << " "
219  << std::setw(20) << std::left
220  << m_dict.find (h)->second
221  << it->second
222  << std::right
223  << std::endl;
224  }
225  } // Report ()
226 
227 
228 private:
229 
236  uint64_t GetHash (const std::string phrase)
237  {
238  m_hash.clear ();
239  uint64_t h = 0;
240 
241  if (m_bits == Bits32)
242  {
243  h = m_hash.GetHash32 (phrase);
244  }
245  else
246  {
247  h = m_hash.GetHash64 (phrase);
248  }
249  return h;
250  }
251 
253  enum Bits m_bits;
254 
256  typedef std::map <uint64_t, std::string> hashdict_t;
257 
260 
262  typedef std::vector < std::pair<uint64_t, std::string> > collision_t;
263 
266 
267 }; // class Collider
268 
269 
273 class Dictionary {
274 public:
277  : m_nphrases (0)
278  {
279  m_words.reserve (320000);
280  }
281 
287  void Add (Collider c)
288  {
289  m_hashes.push_back (c);
290  }
291 
297  void Add (const std::string phrase)
298  {
299  if (phrase.size () == 0)
300  {
301  return ;
302  }
303 
304  int newPhrases = 0;
305  for (std::vector <Collider>::iterator it = m_hashes.begin ();
306  it != m_hashes.end ();
307  ++it)
308  {
309  newPhrases += it->Add (phrase);
310  }
311 
312  if (newPhrases)
313  {
314  ++m_nphrases;
315  m_words.push_back (phrase);
316  }
317  } // Add ()
318 
357  {
358  // Expected number of collisions
359  //
360  // Number of buckets = k = 2^bits
361  long double k32 = 0xFFFFFFFF;
362  long double k64 = static_cast<long double> (0xFFFFFFFFFFFFFFFFULL);
363 
364  long double n = m_nphrases;
365  long double Ec32 = n * (n - 1) / ( 2 * k32) * (1 - (n - 2)/(3 * k32));
366  long double Ec64 = n * (n - 1) / ( 2 * k64) * (1 - (n - 2)/(3 * k64));
367 
368  // Output collisions
369  std::cout << "" << std::endl;
370  std::cout << "Number of words or phrases: " << n << std::endl;
371  std::cout << "Expected number of collisions: (32-bit table) " << Ec32
372  << std::endl;
373  std::cout << "Expected number of collisions: (64-bit table) " << Ec64
374  << std::endl;
375  } // ReportExpectedCollisions
376 
377 
379  void Report () const
380  {
382 
383  for (std::vector <Collider>::const_iterator it = m_hashes.begin ();
384  it != m_hashes.end ();
385  ++it)
386  {
387  it->Report ();
388  }
389  } // Report ()
390 
396  void TimeOne (const int hindex)
397  {
398  // Hashing speed
399  uint32_t reps = 100;
400  Hasher h = m_hashes[hindex].m_hash;
401  int start = clock ();
402  for (std::vector<std::string>::const_iterator w = m_words.begin ();
403  w != m_words.end();
404  ++w)
405  {
406  for (uint32_t i = 0; i < reps; ++i)
407  {
408  h.clear ().GetHash32 (*w);
409  }
410  }
411  int stop = clock ();
412  double delta = stop - start;
413  double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
414 
415  std::cout << std::left
416  << std::setw (32) << m_hashes[hindex].GetName ()
417  << std::right
418  << std::setw (10) << m_nphrases
419  << std::setw (10) << reps
420  << std::setw (10) << stop - start
421  << std::setw (12) << per
422  << std::endl;
423 
424  } // TimeOne ()
425 
427  void Time ()
428  {
429  std::cout << "" << std::endl;
430  std::cout << std::left
431  << std::setw (32) << "Hash timing"
432  << std::right
433  << std::setw (10) << "Phrases"
434  << std::setw (10) << "Reps"
435  << std::setw (10) << "Ticks"
436  << std::setw (12) << "ns/hash"
437  << std::endl;
438 
439  for (unsigned int i = 0; i < m_hashes.size (); ++i)
440  {
441  TimeOne (i);
442  }
443  } // Time ()
444 
445 private:
446  unsigned long m_nphrases;
447  std::vector <Collider> m_hashes;
448  std::vector <std::string> m_words;
450 }; // class Dictionary
451 
452 
457 {
458 
459 public:
460 
467  bool Add (const std::string file)
468  {
469  if (std::find (m_files.begin (), m_files.end (), file) == m_files.end ())
470  {
471  m_files.push_back (file);
472  }
473 
474  return true;
475  }
476 
482  void ReadInto (Dictionary & dict)
483  {
484  if (m_files.size () == 0)
485  {
486  Add ("/usr/share/dict/web2");
487  }
488 
489  std::cout << "Hashing the dictionar"
490  << (m_files.size () == 1 ? "y" : "ies")
491  << std::endl;
492 
493  for (std::vector <std::string>::const_iterator it = m_files.begin ();
494  it != m_files.end ();
495  ++it)
496  {
497  std::string dictFile = *it;
498  std::cout << "Dictionary file: " << dictFile << std::endl;
499 
500  // Find collisions
501 
502  // Open the file
503  std::ifstream dictStream;
504  dictStream.open (dictFile.c_str () );
505  if (! dictStream.is_open () )
506  {
507  std::cerr << "Failed to open dictionary file."
508  << "'" << dictFile << "'"
509  << std::endl;
510  continue;
511  }
512 
513  while (dictStream.good () )
514  {
515  std::string phrase;
516  getline (dictStream, phrase);
517  dict.Add (phrase);
518  } // while
519 
520  dictStream.close ();
521 
522  } // for m_files
523 
524  } // ReadInto
525 
526 private:
527  std::vector <std::string> m_files;
529 }; // class DictFiles
530 
531 } // namespace Example
532 
533 } // namespace Hash
534 
535 } // namespace ns3
536 
537 
538 using namespace ns3;
539 using namespace ns3::Hash::Example;
540 
541 int
542 main (int argc, char *argv[])
543 {
544  std::cout << std::endl;
545  std::cout << "Hasher" << std::endl;
546 
547  bool timing = false;
548  DictFiles files;
549 
551  cmd.Usage ("Find hash collisions in the dictionary.");
552  cmd.AddValue ("dict", "Dictionary file to hash",
554  &files));
555  cmd.AddValue ("time", "Run timing test", timing);
556  cmd.Parse (argc, argv);
557 
558  Dictionary dict;
559  dict.Add ( Collider ("FNV1a",
560  Hasher ( Create<Hash::Function::Fnv1a> () ),
562  dict.Add ( Collider ("FNV1a",
563  Hasher ( Create<Hash::Function::Fnv1a> () ),
565 
566  dict.Add ( Collider ("Murmur3",
567  Hasher ( Create<Hash::Function::Murmur3> () ),
569  dict.Add ( Collider ("Murmur3",
570  Hasher ( Create<Hash::Function::Murmur3> () ),
572 
573  files.ReadInto (dict);
574 
575  dict.Report ();
576 
577  if (timing)
578  {
579  dict.Time ();
580  } // if (timing)
581 
582 
583 } // main
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:239
Use 32-bit hash function.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Add(const std::string phrase)
Add a string to the dictionary.
def start()
Definition: core.py:1844
void TimeOne(const int hindex)
Time and report the execution of one hash across the entire Dictionary.
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:202
void Time()
Report the execution time of each hash across the entire Dictionary.
cmd
Definition: second.py:35
bool Add(const std::string file)
CommandLine callback function to add a file argument to the list.
bool Add(const std::string phrase)
Add a string to the Collider.
unsigned long m_nphrases
Number of strings hashed.
Word list and hashers to test.
Collider(const std::string name, Hasher hash, const enum Bits bits)
Constructor.
Callback< R > MakeCallback(R(T::*memPtr)(void), OBJ objPtr)
Definition: callback.h:1489
void ReportExpectedCollisions() const
Report the expected number of collisions.
std::vector< std::string > m_words
List of unique words.
Parse command-line arguments.
Definition: command-line.h:213
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Keep track of collisions.
std::string m_name
Name of this hash.
Hasher & clear(void)
Restore initial state.
Definition: hash.cc:48
Every class exported by the ns3 library is enclosed in the ns3 namespace.
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:247
Source word list files.
Use 64-bit hash function.
void Report() const
Print the collisions for each Collider.
std::vector< std::string > m_files
List of word files to use.
enum Bits m_bits
Hash function.
void Report() const
Print the collisions found.
std::vector< Collider > m_hashes
List of hash Colliders.
Namespace for hasher-example.
void Add(Collider c)
Add a Collider containing a hash function.
collision_t m_coll
The list of collisions.
hashdict_t m_dict
The dictionary map, indexed by hash.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
Generic Hash function interface.
Definition: hash.h:87
Bits
The size of hash function being tested.
std::string GetName() const
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.