// STACK
// a data LIFO structure (last in first out)
// each fxn gets a memory space on the stack to record its own info
// this is called a "stack frame", and is just big enough to hold what
// fxn needs:
// 1. space for all automatic variables
// 2. return addr (assembly) in caller who called this fxn
// 3. space for return value, if any
// 4. other things arch/cpu/os may need

// Recall there's a PC (program counter) register in the CPU that always
// points to the next (or current instruction) being executed.

// Recall STACK mem often starts at high addresses and "grows" towards lower
// address numbers (e.g., opposite of the HEAP).

// Tracking the top of the stack is done using a CPU register called Stack
// Pointer (SP).  Only mem "below" SP is considered valid (higher address
// values, if stack grows towards lower numbers).  Any mem "above" the SP is
// considered invalid, or free (namely, lower address numbers if stack grows
// towards lower numbers).

// A function should never access mem addrs in the invalid portion of the
// stack (e.g., above SP).  Also a fxn should ONLY access memory in its own
// stack frame, not outside in other stack frames: a stack frame is like a
// "private" memory area for the scope of that function alone; accessing any
// mem of another stack frame is like accessing someone else's memory.

// size of stack frame of every fxn is decided at compile time
// compiler also decides on locations of all auto vars relative to the SP
// location, which is set at RUN TIME to the top of that function's stack
// frame.

//////////////////////////////////////////////////////////////////////////////
// See lecture spreadsheet stack.xlsx, tab "stack1"

int foo(int x) // x is also an auto var ("formal param")
{ // x is in M[SP+12]
  int y; // automatic variable: mem space alloc/dealloc on "stack"
  y = x * 2; // y is assigned to addr M[SP+8]
  printf("y=%d", y); // print 6
  printf("y addr=%d", &y); // print 292, or M[SP+8]
  return y; // copy content of Y, or M[SP+8] into "retval" or M[SP]

  // when returning, you call "goto M[SP+4]", back to TEXT segment where
  // "main" is, right at the assignment back from foo().  In other words,
  // you assign the value in M[SP+4] to the PC register.

  // when you return from fxn foo, also need to remember to adjust SP back
  // to the top of the "main" stack frame.  IOW, we "pop" or discard the top
  // of the stack, which is foo's stack frame.  Or, SP=SP+12
}

main() // SP points at addr 300 when "main" is top of stack
{
  int i = 3; // address 300..303, or &i, M[SP]
  int j; // address 304..307, or &i, M[SP+4]

  printf("i=%d", i); // print 3
  j = foo(i); // after returning back from foo(), SP was adjusted back, need
	      // to copy contents of foo:retval into main:j.
	      // main:j or M[SP+4] = foo:retval or M[SP-16]
  printf("j=%d", j);
}

//////////////////////////////////////////////////////////////////////////////
// See lecture spreadsheet stack.xlsx, tab "stack2"

// Passing to functions in C: pass by value (only)
// Even when you pass a pointer to a variable, all you do is pass the addr
// of that variable in memory.
void foo2(int x) // 'x' lives in M[SP+4]
{
  x = x * 2; // this 'x' is private to foo2
  printf("x=%d", x); // prints 6
  return; // goto M[SP]
}

main() // SP points at addr 300 when "main" is top of stack
{
  int i = 3; // address 300..303, or &i, M[SP]
  foo2(i); // foo2 modifies passed value (x) but doesn't modify 'i'
  printf("i=%d", i); // prints 3, not 6
  // Note: when stack frames are popped, the memory of those frames is not
  // changed at all!
}

//////////////////////////////////////////////////////////////////////////////
// base 256: every int from 0 to 2^32 is represented by 4 bytes:
// B1 + B2 * 256 + B3 * 256^2 + B3 * 256^3
// B1 * 256^0 + B2 * 256^1 + B3 * 256^2 + B3 * 256^3
// The order of B1..B4 depends on the computer's endianness.  So the bytes
// could be stored in memory from most-significant to least significant
// byte, or vice verse.

// See lecture spreadsheet stack.xlsx, tab "stack3"
// In excel examples: B1..B4 are Val4..Val1 columns.

// the number 300 is stored in 4 bytes of 'x', in "base 256"
// or 256 + 44: 1 * 256 + 44
int foo3(int *x)
{
  printf("%d %d %d", x, &x, *x); // prints 300 296 3
  // note that x itself has an addr, SP+8, content of x is in M[SP+8]
  *x = *x * 2; // this WILL change the content of the &ptr passed
               // first, *x is retrieved, or 3.  then multiply by 2, get 6.
               // then '6' is stored in M[x] or M[300], or in "SP notation:
  // IOW, M[M[SP+8]] = M[M[SP+8]] * 2
  // meaning, that foo3 is changing a mem addr in stack frame of main!
  return *x; // return '6' or M[300]
}

main2()
{ int i = 3, j = 0;

  printf("before: %d %d", i, j); // 3 0
  j = foo3(&i); // &i is SP, or the *number* 300. main:i already changed
		// before foo3 even returns.  It was changed the minute the
		// "*x = ..." assignment in foo3 executed.
                // j=6 is same as M[SP+4] = M[SP-12]
  printf("after: %d %d", i, j); // 6 6 (note main:i DID change!)
}

//////////////////////////////////////////////////////////////////////////////
// See lecture spreadsheet stack.xlsx, tab "stack4"

char *foo4(void)
{
  char *p = malloc(12); // p is an auto variable, but malloc mem is "global"
			// (is not allocated in the stack segment but the
			// heap).  "global" means that ANY piece of code
			// that has access to that malloc'd addr, can access
			// and modify that mem buffer that malloc returned.
  // assume malloc returned address 10,000=39*256+16
  // malloc'd mem doesn't have 'scope' like auto vars on the stack.
  return p;
}

main()
{
  int i;
  char *str = foo4(); // str ptr is in M[304..307], points to addr 10000
  // some code, eg. strcpy(str, "something");
  free(str);
}

//////////////////////////////////////////////////////////////////////////////
// See lecture spreadsheet stack.xlsx, tab "stack5"

// expected call from main: char *str = foo5()
// BUG: stack frame mem of foo5 leaked to parent!
// don't leak addrs of auto stack vars to parent.
// Once we return to main, the stack frame of foo5 is no longer valid (it's
// been "automatically freed" by moving SP back to main.  BUT, we "leaked"
// the addr of something inside foo5's stack frame into main's memory.
// The moment main calls another fxn, the mem space used by foo5's stack
// frame will be overridden with NEW values, and yet "str" still points to
// that addr.
char *foo5(void)
{
  char p[12]; // p is an auto variable, starting at SP+8 (addr 288, for 12
	      // bytes).  You can declare buffers of almost any size on your
	      // stack, and they automatically show up in memory, no need to
	      // explicitly allocate or free them
  strcpy(p, "abcxyz"); // copy bytes starting at M[SP+8]
  return &p; // note: not returning content of p, but the number "288" (or
	     // 1*256+32)
}

main()
{
  int i;
  char *str = foo5(); // str ptr is in M[304..307], but its content after
		      // returning will contain the number '288'!
  printf("%s\n", str); // prints "abcxyz"
  strcpy(str, "hello world"); // copy new str starting addr 288
  printf("%s\n", str); // prints "hello world"
  // some code
  foo4(); // or any other fxn -> stack mem of 'str' overwritten!
  printf("%s\n", str); // can print anything, depending on what's in mem NOW
  free(str);
}






//////////////////////////////////////////////////////////////////////
// See lecture spreadsheet stack.xlsx, tabs "stack6a" and "stack6b"
char *getusername(void) // stack smashing, hijacking a program
{
  char username[8]; // starts at addr 284 or M[SP]
  gets(username); // read from stdin, fill in bytes into username, until see
		  // a null in input.  Note that stdin can be redirected to
		  // read from any other FD, including network sockets.
  // problem: gets() doesn't stop at 8 bytes, can continue to read
  // and overwrite stack frame of this fnx, overwriting ret code, ret val,
  // and even the stack frames below.  A buffer or stack overflow bug.
  return username; // note: on stack, but if called memcpy's it right away,
		   // may be ok.
  // "return" will "goto M[SP+12]"
  // result: hijack program, run user's code starting at addr 300
  // a stack overflow/smashing bug. many such bugs still exist
  // solution: don't use any fxn with unbound access to mem.
  // e.g., use fgets(3) instead of gets.  But note that programmers can make
  // mistakes by SAYING that the buf len is different than what it is.
}

// Sadly, stack/buf overflow are still fairly prevalent.  Lots of legacy
// code and APIs that are hard to get rid of.  Ideally don't use any fxn
// that reads or writes any buffer w/o also specifying the max length or
// amount of data to read/write for that buffer.

// Solutions: better coding tools, better education to programmers

// OS techniques to "randomize" the start/end of the stack and even where
// stack frames are located.  Called ASLR (Address space layout
// randomization), but can slow down programs.

// How come the STACK segment is even executable?! If the STACK segment has
// only "R" and "W" permissions and not "X" then we shouldn't be able to
// execute code there...  Legacy intel 32 bit CPUs (most popular) didn't
// even have an 'X' bit, so OSs had to have a "noop" flag for the X bit, and
// used it only on processors that DID have that 'x' bit (e.g., SPARC,
// MIPS).

// Alas, there's still lots of bugs that allow a hacker, once they're able
// to cause an overflow, to store malicious data in the STACK, HEAP, or any
// other mmap'd segment.  If hackers can execute their own code, they can
// effectively take over the system entirely.
