Register Pressure¶
In [1]:
!rm -Rf tmp
!mkdir -p tmp
In [21]:
%%writefile tmp/pressure.c
int main()
{
int result = 0;
{
double
x00 = 0,
x01 = 3,
x02 = 1,
x03 = 5,
x04 = 2,
x05 = 8,
x06 = 9,
x07 = 11,
x08 = 99,
x09 = 111,
x10 = 33+0,
x11 = 33+3,
x12 = 33+1,
x13 = 33+5,
x14 = 33+2,
x15 = 33+8,
x16 = 33+9,
x17 = 33+11,
x18 = 33+99,
x19 = 33+111,
x20 = 17+0,
x21 = 17+3,
x22 = 17+1,
x23 = 17+5,
x24 = 17+2,
x25 = 17+8,
x26 = 17+9,
x27 = 17+11,
x28 = 17+99,
x29 = 17+111;
double a = 0;
for (int i = 0; i< 1000*1000; ++i)
{
#define ALL (\
x00 + x01 + x02 + x03 + x04 + x05 + x06 + x07 + x08 + x09 /*+ \
x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + \
x20 + x21 + x22 + x23 + x24 + x25 + x26 + x27 + x28 + x29 */\
)
x00 += i*ALL;
x01 += i*ALL;
x02 += i*ALL;
x03 += i*ALL;
x04 += i*ALL;
x05 += i*ALL;
x06 += i*ALL;
x07 += i*ALL;
x08 += i*ALL;
x09 += i*ALL;
/*
x10 += i*ALL;
x11 += i*ALL;
x12 += i*ALL;
x13 += i*ALL;
x14 += i*ALL;
x15 += i*ALL;
x16 += i*ALL;
x17 += i*ALL;
x18 += i*ALL;
x19 += i*ALL;
x20 += i*ALL;
x21 += i*ALL;
x22 += i*ALL;
x23 += i*ALL;
x24 += i*ALL;
x25 += i*ALL;
x26 += i*ALL;
x27 += i*ALL;
x28 += i*ALL;
x29 += i*ALL;
*/
}
result += ALL;
}
return result;
}
Overwriting tmp/pressure.c
In [22]:
!cd tmp; gcc -O -c pressure.c
!objdump --disassemble tmp/pressure.o
tmp/pressure.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <main>: 0: b8 00 00 00 00 mov $0x0,%eax 5: f2 0f 10 0d 00 00 00 movsd 0x0(%rip),%xmm1 # d <main+0xd> c: 00 d: f2 0f 10 15 00 00 00 movsd 0x0(%rip),%xmm2 # 15 <main+0x15> 14: 00 15: f2 0f 10 1d 00 00 00 movsd 0x0(%rip),%xmm3 # 1d <main+0x1d> 1c: 00 1d: f2 0f 10 2d 00 00 00 movsd 0x0(%rip),%xmm5 # 25 <main+0x25> 24: 00 25: f2 0f 10 35 00 00 00 movsd 0x0(%rip),%xmm6 # 2d <main+0x2d> 2c: 00 2d: f2 0f 10 3d 00 00 00 movsd 0x0(%rip),%xmm7 # 35 <main+0x35> 34: 00 35: f2 44 0f 10 05 00 00 movsd 0x0(%rip),%xmm8 # 3e <main+0x3e> 3c: 00 00 3e: f2 44 0f 10 0d 00 00 movsd 0x0(%rip),%xmm9 # 47 <main+0x47> 45: 00 00 47: f2 44 0f 10 1d 00 00 movsd 0x0(%rip),%xmm11 # 50 <main+0x50> 4e: 00 00 50: 66 45 0f ef d2 pxor %xmm10,%xmm10 55: 66 0f ef e4 pxor %xmm4,%xmm4 59: f2 0f 2a e0 cvtsi2sd %eax,%xmm4 5d: 66 41 0f 28 c2 movapd %xmm10,%xmm0 62: f2 41 0f 58 c3 addsd %xmm11,%xmm0 67: f2 41 0f 58 c1 addsd %xmm9,%xmm0 6c: f2 41 0f 58 c0 addsd %xmm8,%xmm0 71: f2 0f 58 c7 addsd %xmm7,%xmm0 75: f2 0f 58 c6 addsd %xmm6,%xmm0 79: f2 0f 58 c5 addsd %xmm5,%xmm0 7d: f2 0f 58 c3 addsd %xmm3,%xmm0 81: f2 0f 58 c2 addsd %xmm2,%xmm0 85: f2 0f 58 c1 addsd %xmm1,%xmm0 89: f2 0f 59 c4 mulsd %xmm4,%xmm0 8d: f2 44 0f 58 d0 addsd %xmm0,%xmm10 92: 66 41 0f 28 c2 movapd %xmm10,%xmm0 97: f2 41 0f 58 c3 addsd %xmm11,%xmm0 9c: f2 41 0f 58 c1 addsd %xmm9,%xmm0 a1: f2 41 0f 58 c0 addsd %xmm8,%xmm0 a6: f2 0f 58 c7 addsd %xmm7,%xmm0 aa: f2 0f 58 c6 addsd %xmm6,%xmm0 ae: f2 0f 58 c5 addsd %xmm5,%xmm0 b2: f2 0f 58 c3 addsd %xmm3,%xmm0 b6: f2 0f 58 c2 addsd %xmm2,%xmm0 ba: f2 0f 58 c1 addsd %xmm1,%xmm0 be: f2 0f 59 c4 mulsd %xmm4,%xmm0 c2: f2 44 0f 58 d8 addsd %xmm0,%xmm11 c7: 66 41 0f 28 c2 movapd %xmm10,%xmm0 cc: f2 41 0f 58 c3 addsd %xmm11,%xmm0 d1: 66 44 0f 28 e0 movapd %xmm0,%xmm12 d6: f2 45 0f 58 e1 addsd %xmm9,%xmm12 db: f2 45 0f 58 e0 addsd %xmm8,%xmm12 e0: f2 44 0f 58 e7 addsd %xmm7,%xmm12 e5: f2 44 0f 58 e6 addsd %xmm6,%xmm12 ea: f2 44 0f 58 e5 addsd %xmm5,%xmm12 ef: f2 44 0f 58 e3 addsd %xmm3,%xmm12 f4: f2 44 0f 58 e2 addsd %xmm2,%xmm12 f9: f2 44 0f 58 e1 addsd %xmm1,%xmm12 fe: f2 44 0f 59 e4 mulsd %xmm4,%xmm12 103: f2 45 0f 58 cc addsd %xmm12,%xmm9 108: f2 41 0f 58 c1 addsd %xmm9,%xmm0 10d: 66 44 0f 28 e0 movapd %xmm0,%xmm12 112: f2 45 0f 58 e0 addsd %xmm8,%xmm12 117: f2 44 0f 58 e7 addsd %xmm7,%xmm12 11c: f2 44 0f 58 e6 addsd %xmm6,%xmm12 121: f2 44 0f 58 e5 addsd %xmm5,%xmm12 126: f2 44 0f 58 e3 addsd %xmm3,%xmm12 12b: f2 44 0f 58 e2 addsd %xmm2,%xmm12 130: f2 44 0f 58 e1 addsd %xmm1,%xmm12 135: f2 44 0f 59 e4 mulsd %xmm4,%xmm12 13a: f2 45 0f 58 c4 addsd %xmm12,%xmm8 13f: f2 41 0f 58 c0 addsd %xmm8,%xmm0 144: 66 44 0f 28 e0 movapd %xmm0,%xmm12 149: f2 44 0f 58 e7 addsd %xmm7,%xmm12 14e: f2 44 0f 58 e6 addsd %xmm6,%xmm12 153: f2 44 0f 58 e5 addsd %xmm5,%xmm12 158: f2 44 0f 58 e3 addsd %xmm3,%xmm12 15d: f2 44 0f 58 e2 addsd %xmm2,%xmm12 162: f2 44 0f 58 e1 addsd %xmm1,%xmm12 167: f2 44 0f 59 e4 mulsd %xmm4,%xmm12 16c: f2 41 0f 58 fc addsd %xmm12,%xmm7 171: f2 0f 58 c7 addsd %xmm7,%xmm0 175: 66 44 0f 28 e0 movapd %xmm0,%xmm12 17a: f2 44 0f 58 e6 addsd %xmm6,%xmm12 17f: f2 44 0f 58 e5 addsd %xmm5,%xmm12 184: f2 44 0f 58 e3 addsd %xmm3,%xmm12 189: f2 44 0f 58 e2 addsd %xmm2,%xmm12 18e: f2 44 0f 58 e1 addsd %xmm1,%xmm12 193: f2 44 0f 59 e4 mulsd %xmm4,%xmm12 198: f2 41 0f 58 f4 addsd %xmm12,%xmm6 19d: f2 0f 58 c6 addsd %xmm6,%xmm0 1a1: 66 44 0f 28 e0 movapd %xmm0,%xmm12 1a6: f2 44 0f 58 e5 addsd %xmm5,%xmm12 1ab: f2 44 0f 58 e3 addsd %xmm3,%xmm12 1b0: f2 44 0f 58 e2 addsd %xmm2,%xmm12 1b5: f2 44 0f 58 e1 addsd %xmm1,%xmm12 1ba: f2 44 0f 59 e4 mulsd %xmm4,%xmm12 1bf: f2 41 0f 58 ec addsd %xmm12,%xmm5 1c4: f2 0f 58 c5 addsd %xmm5,%xmm0 1c8: 66 44 0f 28 e0 movapd %xmm0,%xmm12 1cd: f2 44 0f 58 e3 addsd %xmm3,%xmm12 1d2: f2 44 0f 58 e2 addsd %xmm2,%xmm12 1d7: f2 44 0f 58 e1 addsd %xmm1,%xmm12 1dc: f2 44 0f 59 e4 mulsd %xmm4,%xmm12 1e1: f2 41 0f 58 dc addsd %xmm12,%xmm3 1e6: f2 0f 58 c3 addsd %xmm3,%xmm0 1ea: 66 44 0f 28 e0 movapd %xmm0,%xmm12 1ef: f2 44 0f 58 e2 addsd %xmm2,%xmm12 1f4: f2 44 0f 58 e1 addsd %xmm1,%xmm12 1f9: f2 44 0f 59 e4 mulsd %xmm4,%xmm12 1fe: f2 41 0f 58 d4 addsd %xmm12,%xmm2 203: f2 0f 58 c2 addsd %xmm2,%xmm0 207: 66 44 0f 28 e0 movapd %xmm0,%xmm12 20c: f2 44 0f 58 e1 addsd %xmm1,%xmm12 211: f2 41 0f 59 e4 mulsd %xmm12,%xmm4 216: f2 0f 58 cc addsd %xmm4,%xmm1 21a: 83 c0 01 add $0x1,%eax 21d: 3d 40 42 0f 00 cmp $0xf4240,%eax 222: 0f 85 2d fe ff ff jne 55 <main+0x55> 228: f2 0f 58 c1 addsd %xmm1,%xmm0 22c: f2 0f 58 05 00 00 00 addsd 0x0(%rip),%xmm0 # 234 <main+0x234> 233: 00 234: f2 0f 2c c0 cvttsd2si %xmm0,%eax 238: c3 retq
- Grow the working set by moving the comment markers, observe spills in the assembly listing
- At what size of the working set do you expect to see no spills?