//////////////////////////////////////////////////////////////////////////////
// CODE

// issues with malloc'd mem: initializations
int main1(void) {
  char *s; // uninitialized pointer: has some fixed lvalue, eg, 30,000 up
	   // to, say 30,003.  Recall that a "char*" is nothing but an
	   // "array of characters".  A string is an array of chars that ends
	   // with a null byte (\0).

  printf("%d\n", &s); // print lvalue (or location in mem) where 's' is
		      // stored, will print 30,000
  printf("%d\n", s); // &s[0], will print pointer value
  printf("%d\n", s[0]); // print first cell, or M[M[30000]]
  // note: s is a "char*" but "s[0]" is just a char (i.e., remove one '*')
  // same is true for s[1], s[2], etc.  Another syntax for this is "*s" (or
  // to "dereference 's'").  And "*s" is a shortcut for "*(s+0)".  So, s[5]
  // is the same as *(s+5).  Recall you can perform arithmetic on pointers
  // in C, ie. add/subtract pointer values as if they're just numbers.

  s[1] = 17; // store '17' as the 2nd byte in 's', or M[M[30001]] <- 17.
  // above code can succeed if you try to read/write to a valid mem addr in
  // your virt add space, else SEGV.

  s = malloc(10); // assume malloc returned addr 4000, meaning malloc gives
		  // you the buffer in memory from 4000..4009.
  // same as M[30,000] <- 4000
  printf("%d\n", s); // &s[0], will print pointer value -- 4000
  s[1] = 17; // go to addr 30000 (where s is always located in this fxn)
	     // read that, get 4000; go to addr 4000
	     // (start of array s), add 1, get 4001, then store the number
	     // 17 into addr 4001.
	     // M[ M[30000] + 1] <- 17 or ..
	     // M[4001] <- 17

  printf("%d\n", s[0]); // print first element of s (string == array of
			// char).  Show content of cell M[M[30000]] or
			// M[4000].  This can print ANYTHING b/c you didn't
			// init the mem you got back from malloc.

  // Note: when we say above that we assigned '4000' to address 30000, we
  // really assigned the number 4000 for the 4 bytes starting at 30000.
  // Why?  A single byte is 8 bits, and can hold values from 0..255.  To
  // store larger numbers, you have to store them into 4 bytes in "base
  // 255".  4000 is 15*256 + 160, or 3840+160.  So you store in the 4 bytes
  // starting at 30000, the following 4 numbers
  // M[30000] <- 160 (called the LSB -- least significant byte)
  // M[30001] <- 15
  // M[30002] <- 0
  // M[30003] <- 0 (called the MSB -- most significant byte)
  //
  // so the full value of the 32-bit number in those 4 bytes is calculated
  // as M[30000] + M[30001]*256 + M[30002]*256*256 + M[30003]*256*256*256 or
  // as M[30000]*256^0 + M[30001]*256^1 + M[30002]*256^2 + M[30003]*256^3 or
  // as 160*256^0 + 15*256^1 + 0*256^2 + 0*256^3
  //
  // Question: sometimes, the architecture likes to store the above 4 bytes
  // in the opposite order:
  // M[30000] <- 0 (MSB)
  // M[30001] <- 0
  // M[30002] <- 15
  // M[30003] <- 160 (LSB)
  //
  // the difference between these two styles is called the endian-ness,
  // whether the system is "big endian" or "little endian".  There's a whole
  // class of "endianness" bugs where data is stored or transmitted between
  // entities of different endianness, and the values are transposed.  The
  // number 4000 as seen above, if mixed to the "other" endianness
  // incorrectly, will become
  // as 0*256^0 + 0*256^1 + 15*256^2 + 160*256^3 or
  // 2,685,337,600


  // above problem: array of 10 bytes from malloc is not initialized,
  // M[4000]..M[4009].  You can init using calloc(3), or "zalloc", or
  memset(s, 0, 10); // M[4000] <- 0; ... ; M[4009] <- 0.

  // lets put something valid
  strcpy(s, "hi there"); // M[4000] <- 'h' or 104 (see man ascii)
  // etc for the remaining chars including M[4009] <- 0 (null terminating
  // the string)
  printf("%s", s); // will print "hi there", one char at a time, starting at
		   // address 4000, until it hits a 0 byte (infinite loop!)
		   // Will only stop when sees a 0 or hit an unmapped mem
		   // region (you get a SEGV).  IOW, this printf can easily
		   // run off the end of the buffer (buf overflow).

  // now free the ptr s
  free(s);
  // malloc will only mark the entry for addr 4000 (len 10) as "free".
  // malloc will NOT change the value of &s, s itself, or anything in mem
  // addresses 4000..4009.

  // assume nothing changed in mem here

  // next: do a "use-after-free" bug
  printf("%d\n", &s); // print addr of s: 30,000 (recall this never changes)
  printf("%d\n", s); // print content of s variable memory: 4000 (why, b/c
		     // free doesn't change the addr assigned to 's')
  printf("%s", s); // will it print "hi there"? yes, b/c free doesn't change
		   // content POINTED to by 's' either.

  // now, assume time has passed, your code (possible multi threaded) has
  // done a lot of additional malloc's and free's....
  printf("%d\n", &s); // print addr of s: 30,000
  printf("%d\n", s); // can be any value (content of mem at 30000)
  printf("%s", s); // can be anything from anywhere in memory.

  // suppose that 'free()' were to zero out the variable and its values? (C
  // doesn't).  You'll still have a problem due to pointer aliasing:
  q = s; // an alias for 's' created before free(s), so now 'q' holds the
	 // value 4000 even after free(s) succeeded.

  // what about content of mem at 4000?  use memset to zero it out.
  // additional steps below are useful for debugging (other than free)
  // 1. first memset mem
  memset(s, 0, 10); // anything trying to read this mem location will get 0s
  // 2. then free
  free(s);
  // 3. then null out variable
  s = NULL; // if your code uses it, will result in SEGV

  // but a value of 0 can be a valid value, esp. for initialized variables
  // and buffers, so how can we tell the diff?  Instead, "poison" the memory
  // with special non-zero values that would identify the nature of the bug.

  // 1. poison the buffer with a known value
  memset(s, 0x5a, 10); // 5d is 0101b, and 'a' in decimal is 1010b
  // 2. then free
  free(s);
  // 3. then write another special addr value
  s = 0xdeadbeef; // or 0xdeadc0de
  // you may also protect the mem page with addr 0xdeadbeef with PROT_NONE
  // so if you get a SEGV with 0xdeadbeef, you know it was a use-after-free
  // ptr.  That's different  than if you got a NULL ptr deref (that's more
  // likely an indication of using an uninitialized ptr).
  // And if you get any buf contents with hex values like 0x5a5a5a5a,
  // you know it was a malloc'd buffer that was freed and used after
  // (poisoned).

  // other tricks:
  // 1. initialize every unassigned pointer with a specific hex value, eg
  // 0xdeadc0de. e.g.,
  void *p; // compiler may assign, in debug mode, the value 0xdeadc0de, so
	   // if your prog crashes w/ this value, you know that it was an
	   // uninitialized variable.
  int a[100]; // auto var
  p = malloc(100); // malloc'd buffer
  // some debug libraries/compilers, will write patter 0x6b to each byte of
  // an uninitialized variable, so if you every see a patter 0x6b6b6b6, you
  // know it was an uninitialized buffer (not ptr)

  // what about buffer overflows or underflows?
  // 1. suppose I want a buffer of 10 bytes -- malloc(10)
  len = 4 + 10 + 4; // need 18 bytes
  void *p = malloc(18);
  memset(&p[0], 0xaa, 4); // called a guard, guardian, redzone
  memset(&p[4], 0xbb, 10);
  memset(&p[14], 0xcc, 4);
  // return to caller of allocation, not &p[0], &p[4]
  // so if you see different patterns, you know you've had a buf underflow
  // (0xaa's), overflow (0xcc's), or using uninit'd bytes (0xbb's).
}
