diff --git a/f3dex3.s b/f3dex3.s
index f58a961..c986e61 100644
--- a/f3dex3.s
+++ b/f3dex3.s
@@ -773,8 +773,8 @@ vZero equ $v0  // all elements = 0
 //                 $zero // Hardwired zero scalar register
 perfCounterD   equ $12   // Performance counter D (functions depend on config)
 altBaseReg     equ $13   // Alternate base address register for vector loads
-inputVtxPos    equ $14   // Pointer to loaded vertex to transform
-outputVtxPos   equ $15   // Pointer to vertex buffer to store transformed verts
+inVtx    equ $14   // Pointer to loaded vertex to transform
+outVtxBase     equ $15   // Pointer to vertex buffer to store transformed verts
 clipFlags      equ $16   // Current clipping flags being checked
 clipPolyRead   equ $17   // Read pointer within current polygon being clipped
 clipPolySelect equ $18   // Clip poly double buffer selection, or < 0 for normal tri write
@@ -792,8 +792,9 @@ perfCounterC   equ $30   // Performance counter C (functions depend on config)
 
 // Misc scalar regs:
 clipMaskIdx  equ $6
-secondVtxPos equ $8
+outVtx2      equ $8
 curLight     equ $9
+outVtx1      equ $19
 
 // Arguments to dma_read_write
 dmaLen   equ $19 // also used by itself
@@ -815,18 +816,18 @@ postOvlRA equ $10 // Commonly used locally
 // $6: clipMaskIdx, geometry mode low byte during tri write, local
 // $7: command byte when command handler is called, mIT recompute flag in
 //     Overlay 4, local
-// $8: secondVtxPos, local
+// $8: outVtx2, local
 // $9: curLight, clip mask during clipping, local
 // $10: postOvlRA, common local
 // $11: very common local
 // $12: perfCounterD (global). This must be $12 for S2DEX compat in while_wait_dma_busy.
 // $13: altBaseReg (global)
-// $14: inputVtxPos, local
-// $15: outputVtxPos, local
+// $14: inVtx, local
+// $15: outVtxBase, local
 // $16: clipFlags (global)
 // $17: clipPolyRead (global)
 // $18: clipPolySelect (global)
-// $19: dmaLen, onscreen vertex during clipping, local
+// $19: dmaLen, outVtx1, local
 // $20: dmemAddr, local
 // $21: clipPolyWrite (global)
 // $22: rdpCmdBufEndP1 (global)
@@ -1869,14 +1870,14 @@ clip_edgelooptop: // Loop over edges connecting verts, possibly subdivide the ed
     beq     $11, clipFlags, clip_nextedge  // Both set or both clear = both off screen or both on screen, no subdivision
      move   clipFlags, $11                     // clipFlags = masked V2's flags
     // Going to subdivide this edge. Find available temp vertex slot.
-    li      outputVtxPos, clipTempVertsEnd
+    li      outVtxBase, clipTempVertsEnd
 clip_find_unused_loop:
-    lhu     $11, (VTX_CLIP - vtxSize)(outputVtxPos)
-    addi    $10, outputVtxPos, -clipTempVerts  // This is within the loop rather than before b/c delay after lhu
+    lhu     $11, (VTX_CLIP - vtxSize)(outVtxBase)
+    addi    $10, outVtxBase, -clipTempVerts  // This is within the loop rather than before b/c delay after lhu
     blez    $10, clip_done                 // If can't find one (should never happen), give up
      andi   $11, $11, CLIP_VTX_USED
     bnez    $11, clip_find_unused_loop
-     addi   outputVtxPos, outputVtxPos, -vtxSize
+     addi   outVtxBase, outVtxBase, -vtxSize
     beqz    clipFlags, clip_skipswap23     // V2 flag is clear / on screen, therefore V3 is set / off screen
      move   $19, $2                            // 
     move    $19, $3                            // Otherwise swap V2 and V3; note we are overwriting $3 but not $2
@@ -1936,7 +1937,7 @@ clip_skipxy:
     vabs    $v29, $v29, $v31[3]           // 2; v29 = +/- 2 based on sum positive (incl. zero) or negative
     lhu     $5, geometryModeLabel + 1     // Load middle 2 bytes of geom mode, incl fog setting
     vmudn   $v2, $v2, $v29[3]             // multiply reciprocal by +/- 2
-    sh      outputVtxPos, (clipPoly)(clipPolyWrite) // Write pointer to generated vertex to polygon
+    sh      outVtxBase, (clipPoly)(clipPolyWrite) // Write pointer to generated vertex to polygon
     vmadh   $v3, $v3, $v29[3]
     lhu     $11, VTX_CLIP($3)             // Load clip flags for off screen vert
     veq     $v3, $v3, $v31[2]             // 0; if reciprocal high is 0
@@ -1951,7 +1952,7 @@ clip_skipxy:
     sh      $11, VTX_CLIP($3)             // Store modified clip flags for off screen vert
     vrcph   $v24[3], vClDiffI[3]          // reciprocal again (discard result)
 .if CFG_LEGACY_VTX_PIPE && CFG_NO_OCCLUSION_PLANE // New LVP_NOC
-    addi    outputVtxPos, outputVtxPos, -vtxSize // Inc'd by 2, must point to second vtx
+    addi    outVtxBase, outVtxBase, -vtxSize // Inc'd by 2, must point to second vtx
 .endif
     vrcpl   $v23[3], vClDiffF[3]          // frac part
 .if CFG_LEGACY_VTX_PIPE && CFG_NO_OCCLUSION_PLANE // New LVP_NOC
@@ -1991,7 +1992,7 @@ clip_skipxy:
 .endif
     vmrg    vClDiffF, vClDiffF, $v31[1]   // keep frac part of factor, else set to 0xFFFF (max val)
 .if CFG_LEGACY_VTX_PIPE && CFG_NO_OCCLUSION_PLANE // New LVP_NOC
-    move    secondVtxPos, $19             // Old last vtx regs = temp mem
+    move    outVtx2, $19             // Old last vtx regs = temp mem
 .endif
     vsubc   $v29, vClDiffF, vOne[0]       // frac part - 1 for carry
     vge     vClDiffI, vClDiffI, $v31[2]   // 0; If integer part of factor >= 0 (after carry, so overall value >= 0x0000.0001),
@@ -2150,11 +2151,11 @@ Epilogue
 */
 
 vtx_after_dma:
-    andi    inputVtxPos, dmemAddr, 0xFFF8      // Round down input start addr to DMA word
+    andi    inVtx, dmemAddr, 0xFFF8      // Round down input start addr to DMA word
     lhu     $5, geometryModeLabel + 1          // Load middle 2 bytes of geom mode
     srl     $2, cmd_w0, 11                     // n << 1
     sub     $2, cmd_w0, $2                     // = v0 << 1
-    lhu     outputVtxPos, (vertexTable)($2)    // Address of output start
+    lhu     outVtxBase, (vertexTable)($2)    // Address of output start
 .if COUNTER_A_UPPER_VERTEX_COUNT
     sll     $11, $1, 12                        // Vtx count * 0x10000
     add     perfCounterA, perfCounterA, $11    // Add to vertex count
@@ -2245,7 +2246,7 @@ vtx_setup_constants:
 .endif
 
 vtx_after_setup_constants:
-    andi    $8, $5, G_LIGHTING >> 8        // Temp to be reused below, is secondVtxPos
+    andi    $8, $5, G_LIGHTING >> 8        // Temp to be reused below, is outVtx2
     beqz    $8, @@skip_lighting
      li     $16, vtx_loop_no_lighting      // This is clipFlags, but not modified
     li      $16, lt_vtx_pair               // during vtx_store
@@ -2270,9 +2271,9 @@ vtx_after_lt_setup:
     lqv     vM0F,     (mITMatrix + 0x20)($zero)
     lqv     vM2F,     (fourthQWMVP +  0)($zero)
 .if CFG_NO_OCCLUSION_PLANE // New LVP_NOC
-    addi    outputVtxPos, outputVtxPos, -vtxSize // Will inc by 2, but need point to 2nd
+    addi    outVtxBase, outVtxBase, -vtxSize // Will inc by 2, but need point to 2nd
 .else
-    addi    outputVtxPos, outputVtxPos, -2*vtxSize // Going to increment this by 2 verts in loop
+    addi    outVtxBase, outVtxBase, -2*vtxSize // Going to increment this by 2 verts in loop
 .endif
     vcopy   vM1I,  vM0I
     vcopy   vM3I,  vM2I
@@ -2316,7 +2317,7 @@ vtx_after_calc_mit:
     lqv     vVP2I,    (vpMatrix  + 0x10)($zero)
     lqv     vVP0F,    (vpMatrix  + 0x20)($zero)
     lqv     vVP2F,    (vpMatrix  + 0x30)($zero)
-    addi    outputVtxPos, outputVtxPos, -2*vtxSize // Going to increment this by 2 verts in loop
+    addi    outVtxBase, outVtxBase, -2*vtxSize // Going to increment this by 2 verts in loop
     vcopy   vVP1I, vVP0I
     vcopy   vVP3I, vVP2I
     ldv     vVP1I[0], (vpMatrix  + 0x08)($zero)
@@ -2333,21 +2334,21 @@ vtx_after_calc_mit:
     andi    $7, $5, G_FOG >> 8    // Nonzero if fog enabled
 .if CFG_LEGACY_VTX_PIPE && CFG_NO_OCCLUSION_PLANE // New LVP_NOC
     srl     $7, $7, 5  // 8 if G_FOG is set, 0 otherwise
-    addi    $19, rdpCmdBufEndP1, vtxSize  // Temp mem; fog writes up to vtxSize before
+    addi    outVtx1, rdpCmdBufEndP1, vtxSize  // Temp mem; fog writes up to vtxSize before
     jal     while_wait_dma_busy   // Wait for vertex load to finish
-     move   secondVtxPos, $19     // for first pre-loop, same for secondVtxPos
-    ldv     vPairPosI[0], (VTX_IN_OB + 0 * inputVtxSize)(inputVtxPos) // 1st vec pos
-    ldv     vPairPosI[8], (VTX_IN_OB + 1 * inputVtxSize)(inputVtxPos) // 2nd vec pos
-    llv     sTCL[8],      (VTX_IN_CN + 0 * inputVtxSize)(inputVtxPos) // RGBA in 4:5
-    llv     sTCL[12],     (VTX_IN_CN + 1 * inputVtxSize)(inputVtxPos) // RGBA in 6:7
-    llv     vPairST[0],   (VTX_IN_TC + 0 * inputVtxSize)(inputVtxPos) // ST in 0:1
+     move   outVtx2, outVtx1     // for first pre-loop, same for outVtx2
+    ldv     vPairPosI[0], (VTX_IN_OB + 0 * inputVtxSize)(inVtx) // 1st vec pos
+    ldv     vPairPosI[8], (VTX_IN_OB + 1 * inputVtxSize)(inVtx) // 2nd vec pos
+    llv     sTCL[8],      (VTX_IN_CN + 0 * inputVtxSize)(inVtx) // RGBA in 4:5
+    llv     sTCL[12],     (VTX_IN_CN + 1 * inputVtxSize)(inVtx) // RGBA in 6:7
+    llv     vPairST[0],   (VTX_IN_TC + 0 * inputVtxSize)(inVtx) // ST in 0:1
     j       vtx_store_loop_entry
-     llv    vPairST[8],   (VTX_IN_TC + 1 * inputVtxSize)(inputVtxPos) // ST in 4:5
+     llv    vPairST[8],   (VTX_IN_TC + 1 * inputVtxSize)(inVtx) // ST in 4:5
 .else
     jal     while_wait_dma_busy   // Wait for vertex load to finish
-     addi   $19, rdpCmdBufEndP1, tempPrevVtxGarbage  // Temp mem we can freely overwrite replaces outputVtxPos
+     addi   outVtx1, rdpCmdBufEndP1, tempPrevVtxGarbage  // Temp mem we can freely overwrite replaces outVtxBase
     j       vtx_store_loop_entry
-     move   secondVtxPos, $19     // for first pre-loop, same for secondVtxPos
+     move   outVtx2, outVtx1     // for first pre-loop, same for outVtx2
 .endif
 
 .if CFG_LEGACY_VTX_PIPE && CFG_NO_OCCLUSION_PLANE // New LVP_NOC
@@ -2358,9 +2359,9 @@ vtx_after_calc_mit:
 // $v22 = vPairST, $v23:$v24 = vPairTPosF/I/temp, $v25:$v26 = temps, $v27 = vPairRGBA,
 // $v28 = vOne, $v29 = garbage, $v30 = params, $v31 = constants
 // $1: 0x10 vtx count, $2: need for clipping, $3: temp, $4: vtx1/perf,
-// $5: geom mode mid, $6: need for clipping, $7: fog flag, $8: secondVtxPos,
-// $9: clipping / curLight, $10:$11: temp, $12: perf, $13: altBaseReg, $14: inputVtxPos,
-// $15: outputVtxPos, $16: clipping / lt jump addr, $17:$18: clipping, $19: shadow out vtx,
+// $5: geom mode mid, $6: need for clipping, $7: fog flag, $8: outVtx2,
+// $9: clipping / curLight, $10:$11: temp, $12: perf, $13: altBaseReg, $14: inVtx,
+// $15: outVtxBase, $16: clipping / lt jump addr, $17:$18: clipping, $19: outVtx1,
 // $20: temp, $21: clipping / first light, $22:$23: cmd buf, $24: temp, $25: cmd_w0 global,
 // $26: taskDataPtr, $27: inputBufferPos, $28:$30: perf, $ra return addr
 
@@ -2374,7 +2375,7 @@ vtx_loop_no_lighting:
     vmadn   vPairTPosF, vM2F, vPairPosI[2h]
     or      $10, $10, $11          // Combine results for first vertex
     vmadh   vPairTPosI, vM2I, vPairPosI[2h]
-    sh      $10,              (VTX_CLIP      )($19) // Store first vertex flags
+    sh      $10,              (VTX_CLIP      )(outVtx1) // Store first vertex flags
 // sKPI is $v11 // vtx_store Keep Int (keep across pipelining)
 // sKPG is vBBB = $v21 // vtx_store Keep Fog
     vge     sKPG, sKPI, $v31[6]  // Clamp W/fog to >= 0x7F00 (low byte is used)
@@ -2385,10 +2386,10 @@ vtx_loop_no_lighting:
 vtx_return_from_lighting:
 vtx_store_for_clip:
     vmudl   $v29, vPairTPosF, $v30[3]       // Persp norm
-    sub     $20, secondVtxPos, $7           // Points 8 before secondVtxPos if fog, else 0
+    sub     $20, outVtx2, $7           // Points 8 before outVtx2 if fog, else 0
 // s1WI is $v16 // vtx_store 1/W Int
     vmadm   s1WI, vPairTPosI, $v30[3]       // Persp norm
-    addi    outputVtxPos, outputVtxPos, 2*vtxSize // Points to SECOND output vtx
+    addi    outVtxBase, outVtxBase, 2*vtxSize // Points to SECOND output vtx
 // s1WF is $v17 // vtx_store 1/W Frac
     vmadn   s1WF, $v31, $v31[2]             // 0
     sbv     sKPG[15], (VTX_COLOR_A + 8)($20) // In VTX_SCR_Y if fog disabled...
@@ -2397,54 +2398,54 @@ vtx_store_for_clip:
     sbv     sKPG[7],  (VTX_COLOR_A + 8 - vtxSize)($20) // ...which gets overwritten below
 // sSCF is $v20 // vtx_store Scaled Clipping Frac
     vmudn   sSCF, vPairTPosF, $v31[3]        // W * clip ratio for scaled clipping
-    ssv     sCLZ[12], (VTX_SCR_Z      )(secondVtxPos)
+    ssv     sCLZ[12], (VTX_SCR_Z      )(outVtx2)
 // sSCI is $v21 // vtx_store Scaled Clipping Int
     vmadh   sSCI, vPairTPosI, $v31[3]        // W * clip ratio for scaled clipping
-    slv     sKPI[8],  (VTX_SCR_VEC    )(secondVtxPos)
+    slv     sKPI[8],  (VTX_SCR_VEC    )(outVtx2)
     vrcph   $v29[0], s1WI[3]
-    slv     sKPI[0],  (VTX_SCR_VEC    )($19)
+    slv     sKPI[0],  (VTX_SCR_VEC    )(outVtx1)
 // sRTF is $v25 // vtx_store Reciprocal Temp Frac
     vrcpl   sRTF[2], s1WF[3]
-    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(secondVtxPos)
+    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(outVtx2)
 // sRTI is $v26 // vtx_store Reciprocal Temp Int
     vrcph   sRTI[3], s1WI[7]
-    slv     sKPF[2],  (VTX_SCR_Z      )($19)
+    slv     sKPF[2],  (VTX_SCR_Z      )(outVtx1)
     vrcpl   sRTF[6], s1WF[7]
     sra     $24, $1, 31        // All 1s if on last iter
     vrcph   sRTI[7], $v31[2] // 0
     andi    $24, $24, vtxSize  // vtxSize if on last iter, else normally 0
     vch     $v29, vPairTPosI, vPairTPosI[3h] // Clip screen high
-    sub     secondVtxPos, outputVtxPos, $24 // First output vtx on last iter, else second
+    sub     outVtx2, outVtxBase, $24 // First output vtx on last iter, else second
     vcl     $v29, vPairTPosF, vPairTPosF[3h] // Clip screen low
-    addi    $19, outputVtxPos, -vtxSize  // First output vtx always
+    addi    outVtx1, outVtxBase, -vtxSize  // First output vtx always
     vmudl   $v29, s1WF, sRTF[2h]
     cfc2    $10, $vcc                   // Screen clip results
     vmadm   $v29, s1WI, sRTF[2h]
-    sdv     vPairTPosF[8],  (VTX_FRAC_VEC  )(secondVtxPos)
+    sdv     vPairTPosF[8],  (VTX_FRAC_VEC  )(outVtx2)
     vmadn   s1WF, s1WF, sRTI[3h]
 // sTCL is $v19 // vtx_store Temp CoLor
-    ldv     sTCL[0],   (VTX_IN_TC + 2 * inputVtxSize)(inputVtxPos) // ST in 0:1, RGBA in 2:3
+    ldv     sTCL[0],   (VTX_IN_TC + 2 * inputVtxSize)(inVtx) // ST in 0:1, RGBA in 2:3
     vmadh   s1WI, s1WI, sRTI[3h]
-    sdv     vPairTPosF[0],  (VTX_FRAC_VEC  )($19)
+    sdv     vPairTPosF[0],  (VTX_FRAC_VEC  )(outVtx1)
     vch     $v29, vPairTPosI, sSCI[3h] // Clip scaled high
-    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(secondVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(outVtx2) // load Z into W slot, will be for fog below
     vmudh   $v29, vOne, $v31[4]  // 4
-    sdv     vPairTPosI[8],  (VTX_INT_VEC   )(secondVtxPos)
+    sdv     vPairTPosI[8],  (VTX_INT_VEC   )(outVtx2)
     vmadn   s1WF, s1WF, $v31[0]  // -4
-    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )($19) // load Z into W slot, will be for fog below
+    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )(outVtx1) // load Z into W slot, will be for fog below
     vmadh   s1WI, s1WI, $v31[0]  // -4
-    sdv     vPairTPosI[0],  (VTX_INT_VEC   )($19)
+    sdv     vPairTPosI[0],  (VTX_INT_VEC   )(outVtx1)
     vmudm   $v29, vPairST, sSTS       // Scale ST
-    ldv     sTCL[8],   (VTX_IN_TC + 3 * inputVtxSize)(inputVtxPos) // ST in 4:5, RGBA in 6:7
+    ldv     sTCL[8],   (VTX_IN_TC + 3 * inputVtxSize)(inVtx) // ST in 4:5, RGBA in 6:7
 // sST2 equ $v11 // vtx_store ST coordinates copy 2
     vmadh   sST2, vOne, $v30          // + 1 * ST offset; elems 0, 1, 4, 5
-    suv     vPairRGBA[4],   (VTX_COLOR_VEC )(secondVtxPos) // Store RGBA for second vtx
+    suv     vPairRGBA[4],   (VTX_COLOR_VEC )(outVtx2) // Store RGBA for second vtx
     vmudl   $v29, s1WF, sRTF[2h]
-    lsv     vPairTPosI[14], (VTX_Z_INT     )(secondVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosI[14], (VTX_Z_INT     )(outVtx2) // load Z into W slot, will be for fog below
     vmadm   $v29, s1WI, sRTF[2h]
-    suv     vPairRGBA[0],   (VTX_COLOR_VEC )($19) // Store RGBA for first vtx
+    suv     vPairRGBA[0],   (VTX_COLOR_VEC )(outVtx1) // Store RGBA for first vtx
     vmadn   s1WF, s1WF, sRTI[3h]
-    lsv     vPairTPosI[6],  (VTX_Z_INT     )($19) // load Z into W slot, will be for fog below
+    lsv     vPairTPosI[6],  (VTX_Z_INT     )(outVtx1) // load Z into W slot, will be for fog below
     vmadh   s1WI, s1WI, sRTI[3h]
     srl     $24, $10, 4            // Shift second vertex screen clipping to first slots
     vcl     $v29, vPairTPosF, sSCF[3h] // Clip scaled low
@@ -2452,34 +2453,34 @@ vtx_store_for_clip:
     vcopy   vPairST, sTCL
     cfc2    $20, $vcc                   // Scaled clip results
     vmudl   $v29, vPairTPosF, s1WF[3h] // Pos times inv W
-    ssv     s1WF[14],          (VTX_INV_W_FRAC)(secondVtxPos)
+    ssv     s1WF[14],          (VTX_INV_W_FRAC)(outVtx2)
     vmadm   $v29, vPairTPosI, s1WF[3h] // Pos times inv W
 // vPairPosI is $v20
-    ldv     vPairPosI[0], (VTX_IN_OB + 2 * inputVtxSize)(inputVtxPos) // Pos of 1st vector for next iteration
+    ldv     vPairPosI[0], (VTX_IN_OB + 2 * inputVtxSize)(inVtx) // Pos of 1st vector for next iteration
     vmadn   vPairTPosF, vPairTPosF, s1WI[3h]
-    ldv     vPairPosI[8], (VTX_IN_OB + 3 * inputVtxSize)(inputVtxPos) // Pos of 2nd vector on next iteration
+    ldv     vPairPosI[8], (VTX_IN_OB + 3 * inputVtxSize)(inVtx) // Pos of 2nd vector on next iteration
     vmadh   vPairTPosI, vPairTPosI, s1WI[3h] // vPairTPosI:vPairTPosF = pos times inv W
-    addi    inputVtxPos, inputVtxPos, (2 * inputVtxSize) // Advance two positions forward in the input vertices
+    addi    inVtx, inVtx, (2 * inputVtxSize) // Advance two positions forward in the input vertices
     vmov    sTCL[4], vPairST[2] // First vtx RG to elem 4
     andi    $10, $10, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
     vmov    sTCL[5], vPairST[3] // First vtx BA to elem 5
     sll     $11, $20, 4            // Shift first vertex scaled clipping to second slots
     vmudl   $v29, vPairTPosF, $v30[3] // Persp norm
-    ssv     s1WF[6],           (VTX_INV_W_FRAC)($19)
+    ssv     s1WF[6],           (VTX_INV_W_FRAC)(outVtx1)
     vmadm   vPairTPosI, vPairTPosI, $v30[3] // Persp norm
-    ssv     s1WI[14],          (VTX_INV_W_INT )(secondVtxPos)
+    ssv     s1WI[14],          (VTX_INV_W_INT )(outVtx2)
     vmadn   vPairTPosF, $v31, $v31[2] // 0; Now vPairTPosI:vPairTPosF = projected position
-    ssv     s1WI[6],           (VTX_INV_W_INT )($19)
+    ssv     s1WI[6],           (VTX_INV_W_INT )(outVtx1)
     // vnop
-    slv     sST2[8],           (VTX_TC_VEC    )(secondVtxPos) // Store scaled S, T vertex 2
+    slv     sST2[8],           (VTX_TC_VEC    )(outVtx2) // Store scaled S, T vertex 2
     vmudh   $v29, sVPO, vOne // offset * 1
-    slv     sST2[0],           (VTX_TC_VEC    )($19) // Store scaled S, T vertex 1
+    slv     sST2[0],           (VTX_TC_VEC    )(outVtx1) // Store scaled S, T vertex 1
     vmadh   $v29, sFGM, $v31[6] // + (0,0,0,1,0,0,0,1) * 0x7F00
     andi    $20, $20, CLIP_SCAL_NPXY // Mask to only bits we care about
     vmadn   sKPF, vPairTPosF, sVPS   // + pos frac * scale
     or      $24, $24, $20            // Combine results for second vertex
     vmadh   sKPI, vPairTPosI, sVPS   // int part, sKPI:sKPF is now screen space pos
-    sh      $24,               (VTX_CLIP      )(secondVtxPos) // Store second vertex clip flags
+    sh      $24,               (VTX_CLIP      )(outVtx2) // Store second vertex clip flags
 vtx_store_loop_entry:
     vmudn   $v29, vM3F, vOne
     blez    $1, vtx_epilogue
@@ -2496,17 +2497,17 @@ vtx_epilogue:
     vge     sCLZ, sKPI, $v31[2]              // 0; clamp Z to >= 0
     or      $10, $10, $11          // Combine results for first vertex
     beqz    $7, @@skip_fog
-     slv    sKPI[8],  (VTX_SCR_VEC    )(secondVtxPos)
-    sbv     sKPG[15], (VTX_COLOR_A    )(secondVtxPos)
-    sbv     sKPG[7],  (VTX_COLOR_A    )($19)
+     slv    sKPI[8],  (VTX_SCR_VEC    )(outVtx2)
+    sbv     sKPG[15], (VTX_COLOR_A    )(outVtx2)
+    sbv     sKPG[7],  (VTX_COLOR_A    )(outVtx1)
 @@skip_fog:
     vmov    sKPF[1], sCLZ[2]
-    ssv     sCLZ[12], (VTX_SCR_Z      )(secondVtxPos)
-    slv     sKPI[0],  (VTX_SCR_VEC    )($19)
-    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(secondVtxPos)
+    ssv     sCLZ[12], (VTX_SCR_Z      )(outVtx2)
+    slv     sKPI[0],  (VTX_SCR_VEC    )(outVtx1)
+    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(outVtx2)
     bltz    $ra, clip_after_vtx_store  // $ra - from clipping or + from while_wait_dma_busy
-     slv    sKPF[2],  (VTX_SCR_Z      )($19)
-    sh      $10,      (VTX_CLIP       )($19) // Store first vertex flags
+     slv    sKPF[2],  (VTX_SCR_Z      )(outVtx1)
+    sh      $10,      (VTX_CLIP       )(outVtx1) // Store first vertex flags
     j       vertex_end
      lqv    $v30, (v30Value)($zero)    // Restore value overwritten in vtx_store
 
@@ -2527,27 +2528,18 @@ vtx_epilogue:
 // sST2 equ $v11 // vtx_store ST coordinates copy 2
 // vPairPosI is $v20
 
-vtx_loop_no_lighting:
     
     
 vtx_return_from_lighting:
-vtx_store_for_clip:
+    TODO
 
-    
-    
-    
-    
-    
-    
-    
-     
-    
-    
-    
-    
 
+    // 76 cycles, 22 more than NOC.
+    // 8 vu cycles for plane, 8 vu cycles for edges, 1 more vnop than NOC,
+    // 3 branch delay slots with SU instr, 2 lands-after-branch.
+vtx_loop_no_lighting:
     vge     $v29, sOC2, sO47     // Each compare to coeffs 4-7
-    slv     sKPF[2],  (VTX_SCR_Z      )($19)
+    nop
     vmudn   $v29, vM3F, vOne
     cfc2    $20, $vcc
     vmadh   $v29, vM3I, vOne
@@ -2560,192 +2552,142 @@ vtx_store_for_clip:
     andi    $11, $20, 0x00F0 // Bits 4-7 for vtx 2
     vmadh   $v29, vM1I, vPairPosI[1h]
     bnez    $11, @@skipv2    // If nonzero, at least one equation false, don't set occluded flag
-     addi   $1, $1, -2*inputVtxSize         // Decrement vertex count by 2
+     addi   outVtxBase, outVtxBase, 2*vtxSize // Points to SECOND output vtx
     ori     $24, $24, CLIP_OCCLUDED // All equations true, set vtx 2 occluded flag
 @@skipv2:
     vmadn   vPairTPosF, vM2F, vPairPosI[2h]
     andi    $20, $20, 0x000F // Bits 0-3 for vtx 1
     vmadh   vPairTPosI, vM2I, vPairPosI[2h]
     bnez    $20, @@skipv1    // If nonzero, at least one equation false, don't set occluded flag
-     sh     $24,            (VTX_CLIP      )(secondVtxPos) // Store second vertex clip flags
+     sh     $24,            (VTX_CLIP      )(outVtx2) // Store second vertex clip flags
     ori     $10, $10, CLIP_OCCLUDED // All equations true, set vtx 1 occluded flag
 @@skipv1:
     vmudl   $v29, vPairTPosF, $v30[3]       // Persp norm
-    sh      $10,            (VTX_CLIP      )($19) // Store first vertex flags
-    vmadm   s1WI, vPairTPosI, $v30[3]       // Persp norm
-    
-    ldv     sOCM[0], (occlusionPlaneMidCoeffs - altBase)(altBaseReg)
-    ldv     sOCM[8], (occlusionPlaneMidCoeffs - altBase)(altBaseReg)
-    addi    outputVtxPos, outputVtxPos, 2*vtxSize // Points to SECOND output vtx
-    
+    bltz    $ra, clip_after_vtx_store
+vtx_store_for_clip:
+     vmadm  s1WI, vPairTPosI, $v30[3]       // Persp norm
     vmadn   s1WF, $v31, $v31[2]             // 0
+    sh      $10,            (VTX_CLIP      )(outVtx1) // Store first vertex flags
     vmudn   sSCF, vPairTPosF, $v31[3]       // W * clip ratio for scaled clipping
-    vmadh   sSCI, vPairTPosI, $v31[3]       // W * clip ratio for scaled clipping
+    blez    $1, vertex_end  // TODO may need to restore $v30
+     vmadh  sSCI, vPairTPosI, $v31[3]       // W * clip ratio for scaled clipping
     vrcph   $v29[0], s1WI[3]
+    ldv     sOCM[0], (occlusionPlaneMidCoeffs - altBase)(altBaseReg)
     vrcpl   sRTF[2], s1WF[3]
+    ldv     sOCM[8], (occlusionPlaneMidCoeffs - altBase)(altBaseReg)
     vrcph   sRTI[3], s1WI[7]
+    addi    $1, $1, -2*inputVtxSize         // Decrement vertex count by 2
     vrcpl   sRTF[6], s1WF[7]
+    sra     $24, $1, 31        // All 1s if on last iter
     vrcph   sRTI[7], $v31[2] // 0
+    andi    $24, $24, vtxSize  // vtxSize if on last iter, else normally 0
     vmudn   $v29, vPairTPosF, sOCM          // X * kx, Y * ky, Z * kz
+    sub     outVtx2, outVtxBase, $24 // First output vtx on last iter, else second
     vmadh   $v29, vPairTPosI, sOCM          // Int * int
+    addi    outVtx1, outVtxBase, -vtxSize  // First output vtx always
     vreadacc sOC1, ACC_UPPER                // Load int * int portion
+    addi    inVtx, inVtx, (2 * inputVtxSize) // Advance two positions forward in the input vertices
     vch     $v29, vPairTPosI, vPairTPosI[3h] // Clip screen high
+    sdv     vPairTPosI[8],  (VTX_INT_VEC   )(outVtx2)
     vcl     $v29, vPairTPosF, vPairTPosF[3h] // Clip screen low
+    sdv     vPairTPosI[0],  (VTX_INT_VEC   )(outVtx1)
     vmudl   $v29, s1WF, sRTF[2h]
     cfc2    $10, $vcc                   // Screen clip results
     vmadm   $v29, s1WI, sRTF[2h]
-    sdv     vPairTPosF[8],  (VTX_FRAC_VEC  )(secondVtxPos)
+    sdv     vPairTPosF[8],  (VTX_FRAC_VEC  )(outVtx2)
     vmadn   s1WF, s1WF, sRTI[3h]
-    ldv     sTCL[0],   (VTX_IN_TC + 2 * inputVtxSize)(inputVtxPos) // ST in 0:1, RGBA in 2:3
+    ldv     sTCL[0],   (VTX_IN_TC + 0 * inputVtxSize)(inVtx) // ST in 0:1, RGBA in 2:3
     vmadh   s1WI, s1WI, sRTI[3h]
-    sdv     vPairTPosF[0],  (VTX_FRAC_VEC  )($19)
-    veq     $v29, $v31, $v31[3h] // Set VCC to 00010001
-    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(secondVtxPos) // load Z into W slot, will be for fog below
+    sdv     vPairTPosF[0],  (VTX_FRAC_VEC  )(outVtx1)
+    vch     $v29, vPairTPosI, sSCI[3h] // Clip scaled high
+    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(outVtx2) // load Z into W slot, will be for fog below
     vmudh   $v29, vOne, $v31[4]  // 4
-    sdv     vPairTPosI[8],  (VTX_INT_VEC   )(secondVtxPos)
+    suv     vPairRGBA[4],   (VTX_COLOR_VEC )(outVtx2) // Store RGBA for second vtx
     vmadn   s1WF, s1WF, $v31[0]  // -4
-    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )($19) // load Z into W slot, will be for fog below
+    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )(outVtx1) // load Z into W slot, will be for fog below
     vmadh   s1WI, s1WI, $v31[0]  // -4
-    sdv     vPairTPosI[0],  (VTX_INT_VEC   )($19)
-    vmrg    sOC1, sOCM, sOC1  // Put constant factor in elems 3, 7
-    ldv     sTCL[8],   (VTX_IN_TC + 3 * inputVtxSize)(inputVtxPos) // ST in 4:5, RGBA in 6:7
-    vch     $v29, vPairTPosI, sSCI[3h] // Clip scaled high
-    suv     vPairRGBA[4],   (VTX_COLOR_VEC )(secondVtxPos) // Store RGBA for second vtx
+    suv     vPairRGBA[0],   (VTX_COLOR_VEC )(outVtx1) // Store RGBA for first vtx
+    vmudm   $v29, vPairST, sSTS      // Scale ST
+    ldv     sTCL[8],   (VTX_IN_TC + 1 * inputVtxSize)(inVtx) // ST in 4:5, RGBA in 6:7
+    vmadh   sST2, vOne, $v30         // + 1 * ST offset; elems 0, 1, 4, 5
+    lsv     vPairTPosI[14], (VTX_Z_INT     )(outVtx2) // load Z into W slot, will be for fog below
     vmudl   $v29, s1WF, sRTF[2h]
-    lsv     vPairTPosI[14], (VTX_Z_INT     )(secondVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosI[6],  (VTX_Z_INT     )(outVtx1) // load Z into W slot, will be for fog below
     vmadm   $v29, s1WI, sRTF[2h]
-    suv     vPairRGBA[0],   (VTX_COLOR_VEC )($19) // Store RGBA for first vtx
+    srl     $24, $10, 4            // Shift second vertex screen clipping to first slots
     vmadn   s1WF, s1WF, sRTI[3h]
-    lsv     vPairTPosI[6],  (VTX_Z_INT     )($19) // load Z into W slot, will be for fog below
+    andi    $24, $24, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
     vmadh   s1WI, s1WI, sRTI[3h]
-    srl     $24, $10, 4            // Shift second vertex screen clipping to first slots
+    slv     sST2[8],           (VTX_TC_VEC    )(outVtx2) // Store scaled S, T vertex 2
     vcl     $v29, vPairTPosF, sSCF[3h] // Clip scaled low
-    andi    $24, $24, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
-    vadd    sOC1, sOC1, sOC1[0q] // Add pairs upwards
+    slv     sST2[0],           (VTX_TC_VEC    )(outVtx1) // Store scaled S, T vertex 1
+    vcopy   vPairST, sTCL
     cfc2    $20, $vcc                   // Scaled clip results
     vmudl   $v29, vPairTPosF, s1WF[3h] // Pos times inv W
+    ssv     s1WF[14],          (VTX_INV_W_FRAC)(outVtx2)
     vmadm   $v29, vPairTPosI, s1WF[3h] // Pos times inv W
+    andi    $10, $10, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
     vmadn   vPairTPosF, vPairTPosF, s1WI[3h]
+    ssv     s1WF[6],           (VTX_INV_W_FRAC)(outVtx1)
     vmadh   vPairTPosI, vPairTPosI, s1WI[3h] // vPairTPosI:vPairTPosF = pos times inv W
-    vadd    sOC1, sOC1, sOC1[1h] // Add elems 1, 5 to 3, 7
-    // vnop
+    ssv     s1WI[14],          (VTX_INV_W_INT )(outVtx2)
+    veq     $v29, $v31, $v31[3h] // Set VCC to 00010001
+    ssv     s1WI[6],           (VTX_INV_W_INT )(outVtx1)
+    vmrg    sOC1, sOCM, sOC1  // Put constant factor in elems 3, 7
+    sll     $11, $20, 4            // Shift first vertex scaled clipping to second slots
     vmudl   $v29, vPairTPosF, $v30[3] // Persp norm
+    andi    $20, $20, CLIP_SCAL_NPXY // Mask to only bits we care about
     vmadm   vPairTPosI, vPairTPosI, $v30[3] // Persp norm
+    lqv     sVPO, (tempViewportOffset)(rdpCmdBufEndP1) // Load viewport offset
     vmadn   vPairTPosF, $v31, $v31[2] // 0; Now vPairTPosI:vPairTPosF = projected position
-    // vnop
-    vlt     $v29, sOC1, $v31[2] // Occlusion plane equation < 0 in elems 3, 7
+    lqv     sVPS, (tempViewportScale)(rdpCmdBufEndP1) // Load viewport scale
+    vadd    sOC1, sOC1, sOC1[0q] // Add pairs upwards
+    or      $24, $24, $20            // Combine results for second vertex
+    vmov    sTCL[4], vPairST[2] // First vtx RG to elem 4
+    andi    $11, $11, CLIP_SCAL_NPXY // Mask to only bits we care about
     vmudh   $v29, sVPO, vOne // offset * 1
-    cfc2    $11, $vcc // Load occlusion plane mid results to bits 3 and 7
+    ldv     vPairPosI[0], (VTX_IN_OB + 0 * inputVtxSize)(inVtx) // Pos of 1st vector for next iteration
     vmadn   sKPF, vPairTPosF, sVPS   // + pos frac * scale
+    ldv     vPairPosI[8], (VTX_IN_OB + 1 * inputVtxSize)(inVtx) // Pos of 2nd vector on next iteration
     vmadh   sKPI, vPairTPosI, sVPS   // int part, sKPI:sKPF is now screen space pos
-    vmadh   sFOG, vOne, $v31[6] // + 0x7F00 in all elements, clamp to 0x7FFF for fog
-    vmudm   $v29, vPairST, sSTS       // Scale ST
     ldv     sO03[0], (occlusionPlaneEdgeCoeffs - altBase)(altBaseReg) // Load coeffs 0-3
-    vmadh   sST2, vOne, $v30          // + 1 * ST offset; elems 0, 1, 4, 5
-    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(secondVtxPos)
-    vmudh   sOSC, sKPI, $v31[4]  // 4; scale up x and y
+    vmadh   sFOG, vOne, $v31[6] // + 0x7F00 in all elements, clamp to 0x7FFF for fog
     ldv     sO03[8], (occlusionPlaneEdgeCoeffs - altBase)(altBaseReg) // and for vtx 2
-    vge     sCLZ, sKPI, $v31[2]  // 0; clamp Z to >= 0
+    vadd    sOC1, sOC1, sOC1[1h] // Add elems 1, 5 to 3, 7
+    lqv     sOPM, (tempOccPlusMinus)(rdpCmdBufEndP1) // Load occlusion plane -/+4000 constants
+    vmov    sTCL[5], vPairST[3] // First vtx BA to elem 5
     ldv     sO47[0], (occlusionPlaneEdgeCoeffs + 8 - altBase)(altBaseReg) // Load coeffs 4-7
-    vge     sFOG, sFOG, $v31[6]  // 0x7F00; clamp fog to >= 0 (want low byte only)
+    vmudh   sOSC, sKPI, $v31[4]   // 4; scale up x and y
     ldv     sO47[8], (occlusionPlaneEdgeCoeffs + 8 - altBase)(altBaseReg) // and for vtx 2
-    vmulf   $v29, sOPM, sKPI[1h] // -0x4000*Y1, --, +0x4000*Y1, --, repeat vtx 2
-    sub     $20, secondVtxPos, $7 // Points 8 before secondVtxPos if fog, else 0
+    vge     sCLZ, sKPI, $v31[2]   // 0; clamp Z to >= 0
+    or      $10, $10, $11         // Combine results for first vertex
+    vge     sFOG, sFOG, $v31[6]   // 0x7F00; clamp fog to >= 0 (want low byte only)
+    sub     $20, outVtx2, $7      // Points 8 before outVtx2 if fog, else 0
+    vlt     $v29, sOC1, $v31[2]   // Occlusion plane equation < 0 in elems 3, 7
+    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(outVtx2)
+    vmulf   $v29, sOPM, sKPI[1h]  // -0x4000*Y1, --, +0x4000*Y1, --, repeat vtx 2
+    cfc2    $11, $vcc // Load occlusion plane mid results to bits 3 and 7
     vmacf   sOC2, sO03, sOSC[0h]  //    4*X1*c0, --,    4*X1*c2, --, repeat vtx 2
-    sub     $3, $19, $7           // Points 8 before $19 if fog, else 0
+    ssv     sCLZ[12], (VTX_SCR_Z      )(outVtx2)
     vmulf   $v29, sOPM, sKPI[0h]  // --, -0x4000*X1, --, +0x4000*X1, repeat vtx 2
-    ssv     sCLZ[12], (VTX_SCR_Z      )(secondVtxPos)
+    sub     $3, outVtx1, $7       // Points 8 before outVtx1 if fog, else 0
     vmacf   sOC3, sO03, sOSC[1h]  // --,    4*Y1*c1, --,    4*Y1*c3, repeat vtx 2
     sbv     sFOG[15], (VTX_COLOR_A + 8)($20) // In VTX_SCR_Y if fog disabled...
     vmov    sKPF[1], sCLZ[2]
-    slv     sKPI[8],  (VTX_SCR_VEC    )(secondVtxPos)
+    slv     sKPI[8],  (VTX_SCR_VEC    )(outVtx2)
     // vnop
     sbv     sFOG[7],  (VTX_COLOR_A + 8)($3) // ...which gets overwritten below
-    veq     $v29, $v31, $v31[0q] // Set VCC to 10101010
-    blez    $1, vtx_epilogue
-     vmrg   sOC2, sOC2, sOC3     // Elems 0-3 are results for vtx 0, 4-7 for vtx 1
-    // vnop
-    jr      $16                    // lt_vtx_pair or vtx_loop_no_lighting
-     slv    sKPI[0],  (VTX_SCR_VEC    )($19)
-
-
-    
-    
-    
-    
-    
-    
-
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    sra     $24, $1, 31        // All 1s if on last iter
-    andi    $24, $24, vtxSize  // vtxSize if on last iter, else normally 0
-    sub     secondVtxPos, outputVtxPos, $24 // First output vtx on last iter, else second
-    addi    $19, outputVtxPos, -vtxSize  // First output vtx always
-    
-    
-    ssv     s1WF[14],          (VTX_INV_W_FRAC)(secondVtxPos)
-    ldv     vPairPosI[0], (VTX_IN_OB + 2 * inputVtxSize)(inputVtxPos) // Pos of 1st vector for next iteration
-    ldv     vPairPosI[8], (VTX_IN_OB + 3 * inputVtxSize)(inputVtxPos) // Pos of 2nd vector on next iteration
-    addi    inputVtxPos, inputVtxPos, (2 * inputVtxSize) // Advance two positions forward in the input vertices
-    andi    $10, $10, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
-    sll     $11, $20, 4            // Shift first vertex scaled clipping to second slots
-    ssv     s1WF[6],           (VTX_INV_W_FRAC)($19)
-    ssv     s1WI[14],          (VTX_INV_W_INT )(secondVtxPos)
-    ssv     s1WI[6],           (VTX_INV_W_INT )($19)
-    slv     sST2[8],           (VTX_TC_VEC    )(secondVtxPos) // Store scaled S, T vertex 2
-    slv     sST2[0],           (VTX_TC_VEC    )($19) // Store scaled S, T vertex 1
-    andi    $20, $20, CLIP_SCAL_NPXY // Mask to only bits we care about
-    or      $24, $24, $20            // Combine results for second vertex
-    
-    
+    veq     $v29, $v31, $v31[0q]  // Set VCC to 10101010
     sdv     sTCL[8],      (tempVpRGBA)(rdpCmdBufEndP1) // Vtx 0 and 1 RGBA in order
-    
-    
-    andi    $11, $11, CLIP_SCAL_NPXY // Mask to only bits we care about
-    or      $10, $10, $11          // Combine results for first vertex
-    
-
-    
-    
-    lqv     sOPM, (tempOccPlusMinus)(rdpCmdBufEndP1) // Load occlusion plane -/+4000 constants
-    
-
-
-vtx_epilogue:
-    vge     sKPG, sKPI, $v31[6]  // Clamp W/fog to >= 0x7F00 (low byte is used)
-    andi    $11, $11, CLIP_SCAL_NPXY // Mask to only bits we care about
-    vge     sCLZ, sKPI, $v31[2]              // 0; clamp Z to >= 0
-    or      $10, $10, $11          // Combine results for first vertex
-    beqz    $7, @@skip_fog
-     slv    sKPI[8],  (VTX_SCR_VEC    )(secondVtxPos)
-    sbv     sKPG[15], (VTX_COLOR_A    )(secondVtxPos)
-    sbv     sKPG[7],  (VTX_COLOR_A    )($19)
-@@skip_fog:
-    vmov    sKPF[1], sCLZ[2]
-    ssv     sCLZ[12], (VTX_SCR_Z      )(secondVtxPos)
-    slv     sKPI[0],  (VTX_SCR_VEC    )($19)
-    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(secondVtxPos)
-    bltz    $ra, clip_after_vtx_store  // $ra - from clipping or + from while_wait_dma_busy
-     slv    sKPF[2],  (VTX_SCR_Z      )($19)
-    sh      $10,      (VTX_CLIP       )($19) // Store first vertex flags
-    j       vertex_end
-     lqv    $v30, (v30Value)($zero)    // Restore value overwritten in vtx_store
+    vmrg    sOC2, sOC2, sOC3      // Elems 0-3 are results for vtx 0, 4-7 for vtx 1
+    slv     sKPI[0],  (VTX_SCR_VEC    )(outVtx1)
+    // vnop
+    jr      $ra                   // lt_vtx_pair or vtx_loop_no_lighting
+     slv    sKPF[2],  (VTX_SCR_Z      )(outVtx1)
 
+vertex_end:
+    j      run_next_DL_command
+     lqv   $v30, (v30Value)($zero)           // Restore value overwritten in vtx_store
 
 
 .endif
@@ -2788,55 +2730,55 @@ vtx_return_from_lighting:
     vmudm   $v29, vPairST, sSTS         // Scale ST; must be after texgen
     vmadh   vPairST, sSTO, vOne         // + 1 * (ST offset or zero)
 .endif
-    addi    outputVtxPos, outputVtxPos, 2*vtxSize
+    addi    outVtxBase, outVtxBase, 2*vtxSize
 vtx_store_for_clip:
     // Inputs: vPairTPosI, vPairTPosF, vPairST, vPairRGBA
     // Locals: $v20, $v21, $v25, $v26, $v16, $v17 ($v29 is temp). Also vPairST and
     // vPairRGBA can be used as temps once stored ($v22, $v27).
-    // Scalar regs: secondVtxPos, outputVtxPos; set to the same thing if only write 1 vtx
+    // Scalar regs: outVtx2, outVtxBase; set to the same thing if only write 1 vtx
     // temps $10, $11, $20, $24
     vmudl   $v29, vPairTPosF, $v30[3] // Persp norm
-    move    secondVtxPos, outputVtxPos          // Second and output vertices write to same mem...
+    move    outVtx2, outVtxBase          // Second and output vertices write to same mem...
     vmadm   s1WI, vPairTPosI, $v30[3] // Persp norm
     bltz    $1, @@skipsecond                    // ...if < 0 verts remain, ...
      vmadn  s1WF, $v31, $v31[2] // 0
-    addi    secondVtxPos, outputVtxPos, vtxSize // ...otherwise, second vtx is next vtx
+    addi    outVtx2, outVtxBase, vtxSize // ...otherwise, second vtx is next vtx
 @@skipsecond:
     vch     $v29, vPairTPosI, vPairTPosI[3h] // Clip screen high
-    suv     vPairRGBA[4],     (VTX_COLOR_VEC )(secondVtxPos)
+    suv     vPairRGBA[4],     (VTX_COLOR_VEC )(outVtx2)
     vcl     $v29, vPairTPosF, vPairTPosF[3h] // Clip screen low
-    suv     vPairRGBA[0],     (VTX_COLOR_VEC )(outputVtxPos)
+    suv     vPairRGBA[0],     (VTX_COLOR_VEC )(outVtxBase)
     vrcph   $v29[0], s1WI[3]
     cfc2    $10, $vcc // Load screen clipping results
     vrcpl   sRTF[2], s1WF[3]
-    sdv     vPairTPosF[8],    (VTX_FRAC_VEC  )(secondVtxPos)
+    sdv     vPairTPosF[8],    (VTX_FRAC_VEC  )(outVtx2)
     vrcph   sRTI[3], s1WI[7]
-    move    $19, outputVtxPos  // Else $19 is initialized to temp memory on first pre-loop
+    move    outVtx1, outVtxBase  // Else outVtx1 is initialized to temp memory on first pre-loop
     vrcpl   sRTF[6], s1WF[7]
-    sdv     vPairTPosF[0],    (VTX_FRAC_VEC  )(outputVtxPos)
+    sdv     vPairTPosF[0],    (VTX_FRAC_VEC  )(outVtxBase)
     vrcph   sRTI[7], $v31[2] // 0
-    sdv     vPairTPosI[8],    (VTX_INT_VEC   )(secondVtxPos)
+    sdv     vPairTPosI[8],    (VTX_INT_VEC   )(outVtx2)
     vmudn   sSCF, vPairTPosF, $v31[3] // W * clip ratio for scaled clipping
-    sdv     vPairTPosI[0],    (VTX_INT_VEC   )(outputVtxPos)
+    sdv     vPairTPosI[0],    (VTX_INT_VEC   )(outVtxBase)
     vmadh   sSCI, vPairTPosI, $v31[3] // W * clip ratio for scaled clipping
-    slv     vPairST[8],       (VTX_TC_VEC    )(secondVtxPos)
+    slv     vPairST[8],       (VTX_TC_VEC    )(outVtx2)
     vmudl   $v29, s1WF, sRTF[2h]
-    slv     vPairST[0],       (VTX_TC_VEC    )(outputVtxPos)
+    slv     vPairST[0],       (VTX_TC_VEC    )(outVtxBase)
     vmadm   $v29, s1WI, sRTF[2h]
 
 .if CFG_NO_OCCLUSION_PLANE
     vmadn   s1WF, s1WF, sRTI[3h]
-    addi    inputVtxPos, inputVtxPos, 2*inputVtxSize
+    addi    inVtx, inVtx, 2*inputVtxSize
     vmadh   s1WI, s1WI, sRTI[3h]
 vtx_store_loop_entry:
 // vPairST is $v22
-    ldv     vPairST[0],   (VTX_IN_TC + inputVtxSize * 0)(inputVtxPos) // ST in 0:1, RGBA in 2:3
+    ldv     vPairST[0],   (VTX_IN_TC + inputVtxSize * 0)(inVtx) // ST in 0:1, RGBA in 2:3
     vch     $v29, vPairTPosI, sSCI[3h] // Clip scaled high
-    ldv     vPairST[8],   (VTX_IN_TC + inputVtxSize * 1)(inputVtxPos) // ST in 4:5, RGBA in 6:7
+    ldv     vPairST[8],   (VTX_IN_TC + inputVtxSize * 1)(inVtx) // ST in 4:5, RGBA in 6:7
     vmudh   $v29, vOne, $v31[4] // 4 * 1 in elems 3, 7
-    lsv     vPairTPosI[14], (VTX_Z_INT     )(secondVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosI[14], (VTX_Z_INT     )(outVtx2) // load Z into W slot, will be for fog below
     vmadn   s1WF, s1WF, $v31[0] // -4
-    lsv     vPairTPosI[6],  (VTX_Z_INT     )($19) // load Z into W slot, will be for fog below
+    lsv     vPairTPosI[6],  (VTX_Z_INT     )(outVtx1) // load Z into W slot, will be for fog below
     vmadh   s1WI, s1WI, $v31[0] // -4
     srl     $24, $10, 4            // Shift second vertex screen clipping to first slots
     vcl     $v29, vPairTPosF, sSCF[3h] // Clip scaled low
@@ -2845,26 +2787,26 @@ vtx_store_loop_entry:
     vcopy   sTCL, vPairST
     cfc2    $20, $vcc // Load scaled clipping results
     vmudl   $v29, s1WF, sRTF[2h]
-    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(secondVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(outVtx2) // load Z into W slot, will be for fog below
     vmadm   $v29, s1WI, sRTF[2h]
-    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )($19) // load Z into W slot, will be for fog below
+    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )(outVtx1) // load Z into W slot, will be for fog below
     vmadn   s1WF, s1WF, sRTI[3h]
 // vPairPosI is $v20
-    ldv     vPairPosI[0], (VTX_IN_OB + inputVtxSize * 0)(inputVtxPos)
+    ldv     vPairPosI[0], (VTX_IN_OB + inputVtxSize * 0)(inVtx)
     vmadh   s1WI, s1WI, sRTI[3h] // s1WI:s1WF is 1/W
-    ldv     vPairPosI[8], (VTX_IN_OB + inputVtxSize * 1)(inputVtxPos)
+    ldv     vPairPosI[8], (VTX_IN_OB + inputVtxSize * 1)(inVtx)
     vmov    sTCL[4], vPairST[2]
     andi    $10, $10, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
     vmov    sTCL[5], vPairST[3]
     ori     $10, $10, CLIP_VTX_USED // Write for all first verts, only matters for generated verts
     vmudl   $v29, vPairTPosF, s1WF[3h]
-    ssv     s1WF[14],         (VTX_INV_W_FRAC)(secondVtxPos)
+    ssv     s1WF[14],         (VTX_INV_W_FRAC)(outVtx2)
     vmadm   $v29, vPairTPosI, s1WF[3h]
-    ssv     s1WF[6],          (VTX_INV_W_FRAC)($19)
+    ssv     s1WF[6],          (VTX_INV_W_FRAC)(outVtx1)
     vmadn   vPairTPosF, vPairTPosF, s1WI[3h]
-    ssv     s1WI[14],         (VTX_INV_W_INT )(secondVtxPos)
+    ssv     s1WI[14],         (VTX_INV_W_INT )(outVtx2)
     vmadh   vPairTPosI, vPairTPosI, s1WI[3h] // pos * 1/W
-    ssv     s1WI[6],          (VTX_INV_W_INT )($19)
+    ssv     s1WI[6],          (VTX_INV_W_INT )(outVtx1)
     // vnop
     sdv     sTCL[8],      (tempVpRGBA)(rdpCmdBufEndP1) // Vtx 0 and 1 RGBA
     // vnop
@@ -2900,24 +2842,24 @@ vtx_store_loop_entry:
     vmadn   vPairTPosF, vPairTPosF, sVPS // + XYZ * scale
     or      $10, $10, $11          // Combine results for first vertex
     vmadh   vPairTPosI, vPairTPosI, sVPS
-    sh      $24,              (VTX_CLIP      )(secondVtxPos) // Store second vertex clip flags
+    sh      $24,              (VTX_CLIP      )(outVtx2) // Store second vertex clip flags
 // sFOG is $v25
     vmadh   sFOG, vOne, $v31[6] // + 0x7F00 in all elements, clamp to 0x7FFF for fog
 .if !CFG_LEGACY_VTX_PIPE
     sdv     sTPN[0],          (tempVpPkNorm)(rdpCmdBufEndP1) // Vtx 0 and 1 packed normals
 .endif
     // vnop
-    sh      $10,              (VTX_CLIP      )($19)          // Store first vertex results
+    sh      $10,              (VTX_CLIP      )(outVtx1)          // Store first vertex results
 // vPairNrml is $v16
     vmudn   vPairNrml, vPairRGBA, $v31[3] // 2; left shift RGBA without clamp; vtx pair normals
-    ssv     vPairTPosF[12],   (VTX_SCR_Z_FRAC)(secondVtxPos)
+    ssv     vPairTPosF[12],   (VTX_SCR_Z_FRAC)(outVtx2)
 // sCLZ is $v21 // vtx_store CLamped Z
     vge     sCLZ, vPairTPosI, $v31[2] // 0; clamp Z to >= 0
-    ssv     vPairTPosF[4],    (VTX_SCR_Z_FRAC)($19)
+    ssv     vPairTPosF[4],    (VTX_SCR_Z_FRAC)(outVtx1)
     vge     sFOG, sFOG, $v31[6] // 0x7F00; clamp fog to >= 0 (want low byte only)
-    slv     vPairTPosI[8],    (VTX_SCR_VEC   )(secondVtxPos)
+    slv     vPairTPosI[8],    (VTX_SCR_VEC   )(outVtx2)
     vmudn   $v29, vM3F, vOne
-    slv     vPairTPosI[0],    (VTX_SCR_VEC   )($19)
+    slv     vPairTPosI[0],    (VTX_SCR_VEC   )(outVtx1)
     vmadh   $v29, vM3I, vOne
     blez    $1, skip_return_to_lt_or_loop  // $ra left as vertex_end or clipping
      vmadn  $v29, vM0F, vPairPosI[0h]
@@ -2926,17 +2868,17 @@ skip_return_to_lt_or_loop:
     vmadh   $v29, vM0I, vPairPosI[0h]
     addi    $1, $1, -2*inputVtxSize     // Counter of remaining verts * inputVtxSize
     vmadn   $v29, vM1F, vPairPosI[1h]
-    ssv     sCLZ[12],         (VTX_SCR_Z     )(secondVtxPos)
+    ssv     sCLZ[12],         (VTX_SCR_Z     )(outVtx2)
     vmadh   $v29, vM1I, vPairPosI[1h]
-    ssv     sCLZ[4],          (VTX_SCR_Z     )($19)
+    ssv     sCLZ[4],          (VTX_SCR_Z     )(outVtx1)
 // sOUTF = vPairPosF is $v21, or vPairTPosF is $v23
     vmadn   sOUTF, vM2F, vPairPosI[2h] // vPairPosI/F = vertices world coords
     beqz    $7, return_and_end_mat // fog disabled
 // sOUTI = vPairPosI is $v20, or vPairTPosI is $v24
      vmadh  sOUTI, vM2I, vPairPosI[2h] // or vPairTPosI/F = vertices clip coords
-    sbv     sFOG[15],         (VTX_COLOR_A   )(secondVtxPos)
+    sbv     sFOG[15],         (VTX_COLOR_A   )(outVtx2)
     jr      $ra
-     sbv    sFOG[7],          (VTX_COLOR_A   )($19)
+     sbv    sFOG[7],          (VTX_COLOR_A   )(outVtx1)
     
 .else // CFG_NO_OCCLUSION_PLANE
     
@@ -2955,17 +2897,17 @@ skip_return_to_lt_or_loop:
     vmadn   s1WF, s1WF, $v31[0] // -4
     ori     $10, $10, CLIP_VTX_USED // Write for all first verts, only matters for generated verts
     vmadh   s1WI, s1WI, $v31[0] // -4
-    addi    inputVtxPos, inputVtxPos, 2*inputVtxSize
+    addi    inVtx, inVtx, 2*inputVtxSize
     vmudn   $v29, vPairTPosF, sOCM // X * kx, Y * ky, Z * kz
     vmadh   $v29, vPairTPosI, sOCM // Int * int
-    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(secondVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(outVtx2) // load Z into W slot, will be for fog below
 // sOC1 is $v21 // vtx_store OCclusion temp 1
     vreadacc sOC1, ACC_UPPER // Load int * int portion
-    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )(outputVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )(outVtxBase) // load Z into W slot, will be for fog below
     vmudl   $v29, s1WF, sRTF[2h]
-    lsv     vPairTPosI[14], (VTX_Z_INT     )(secondVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosI[14], (VTX_Z_INT     )(outVtx2) // load Z into W slot, will be for fog below
     vmadm   $v29, s1WI, sRTF[2h]
-    lsv     vPairTPosI[6],  (VTX_Z_INT     )(outputVtxPos) // load Z into W slot, will be for fog below
+    lsv     vPairTPosI[6],  (VTX_Z_INT     )(outVtxBase) // load Z into W slot, will be for fog below
     vmadn   s1WF, s1WF, sRTI[3h]
     sll     $11, $20, 4            // Shift first vertex scaled clipping to second slots
     vmadh   s1WI, s1WI, sRTI[3h] // s1WI:s1WF is 1/W
@@ -2977,13 +2919,13 @@ vtx_store_loop_entry:
     move    $ra, $16                    // Normally $ra = loop or lighting
 skip_return_to_lt_or_loop:
     vmudl   $v29, vPairTPosF, s1WF[3h]  // W must be overwritten with Z before here
-    ssv     s1WF[14],         (VTX_INV_W_FRAC)(secondVtxPos)
+    ssv     s1WF[14],         (VTX_INV_W_FRAC)(outVtx2)
     vmadm   $v29, vPairTPosI, s1WF[3h]
-    ssv     s1WF[6],          (VTX_INV_W_FRAC)($19)
+    ssv     s1WF[6],          (VTX_INV_W_FRAC)(outVtx1)
     vmadn   vPairTPosF, vPairTPosF, s1WI[3h]
-    ssv     s1WI[14],         (VTX_INV_W_INT )(secondVtxPos)
+    ssv     s1WI[14],         (VTX_INV_W_INT )(outVtx2)
     vmadh   vPairTPosI, vPairTPosI, s1WI[3h] // pos * 1/W
-    ssv     s1WI[6],          (VTX_INV_W_INT )($19)
+    ssv     s1WI[6],          (VTX_INV_W_INT )(outVtx1)
     vadd    sOC1, sOC1, sOC1[0q] // Add pairs upwards
 .if !CFG_LEGACY_VTX_PIPE
 // sVPO is $v17 // vtx_store ViewPort Offset
@@ -2998,14 +2940,14 @@ skip_return_to_lt_or_loop:
 .endif
     vmudl   $v29, vPairTPosF, $v30[3] // Persp norm
 // vPairST is $v22
-    ldv     vPairST[0],   (VTX_IN_TC + inputVtxSize * 0)(inputVtxPos) // ST in 0:1, RGBA in 2:3
+    ldv     vPairST[0],   (VTX_IN_TC + inputVtxSize * 0)(inVtx) // ST in 0:1, RGBA in 2:3
     vmadm   vPairTPosI, vPairTPosI, $v30[3] // Persp norm
-    ldv     vPairST[8],   (VTX_IN_TC + inputVtxSize * 1)(inputVtxPos) // ST in 4:5, RGBA in 6:7
+    ldv     vPairST[8],   (VTX_IN_TC + inputVtxSize * 1)(inVtx) // ST in 4:5, RGBA in 6:7
     vmadn   vPairTPosF, $v31, $v31[2] // 0
 // vPairPosI is $v20
-    ldv     vPairPosI[0],      (VTX_IN_OB + inputVtxSize * 0)(inputVtxPos)
+    ldv     vPairPosI[0],      (VTX_IN_OB + inputVtxSize * 0)(inVtx)
     vadd    sOC1, sOC1, sOC1[1h] // Add elems 1, 5 to 3, 7
-    ldv     vPairPosI[8],      (VTX_IN_OB + inputVtxSize * 1)(inputVtxPos)
+    ldv     vPairPosI[8],      (VTX_IN_OB + inputVtxSize * 1)(inVtx)
     // vnop
 // sO03 is $v26 // vtx_store Occlusion coeffs 0-3
     ldv     sO03[0], (occlusionPlaneEdgeCoeffs - altBase)(altBaseReg) // Load coeffs 0-3
@@ -3034,12 +2976,12 @@ skip_return_to_lt_or_loop:
     cfc2    $11, $vcc // Load occlusion plane mid results to bits 3 and 7
 // sOSC is $v21 // vtx_store Occlusion SCaled up
     vmudh   sOSC, vPairTPosI, $v31[4] // 4; scale up x and y
-    ssv     vPairTPosF[12],   (VTX_SCR_Z_FRAC)(secondVtxPos)
+    ssv     vPairTPosF[12],   (VTX_SCR_Z_FRAC)(outVtx2)
     vge     sFOG, sFOG, $v31[6] // 0x7F00; clamp fog to >= 0 (want low byte only)
     or      $24, $24, $20          // Combine results for second vertex
 // sCLZ is $v25 // vtx_store CLamped Z
     vge     sCLZ, vPairTPosI, $v31[2] // 0; clamp Z to >= 0
-    ssv     vPairTPosF[4],    (VTX_SCR_Z_FRAC)($19)
+    ssv     vPairTPosF[4],    (VTX_SCR_Z_FRAC)(outVtx1)
     vmulf   $v29, sOPM, vPairTPosI[1h] // -0x4000*Y1, --, +0x4000*Y1, --, repeat vtx 2
 // sO47 is $v23 // vtx_store Occlusion coeffs 0-3; $v23 = vPairTPosF
     ldv     sO47[0], (occlusionPlaneEdgeCoeffs + 8 - altBase)(altBaseReg) // Load coeffs 4-7
@@ -3050,12 +2992,12 @@ skip_return_to_lt_or_loop:
     beqz    $7, @@skipfog // fog disabled
 // sOC3 is $v21 // vtx_store OCclusion temp 3
      vmacf  sOC3, sO03, sOSC[1h]       // --,    4*Y1*c1, --,    4*Y1*c3, repeat vtx 2
-    sbv     sFOG[15],         (VTX_COLOR_A   )(secondVtxPos)
-    sbv     sFOG[7],          (VTX_COLOR_A   )($19)
+    sbv     sFOG[15],         (VTX_COLOR_A   )(outVtx2)
+    sbv     sFOG[7],          (VTX_COLOR_A   )(outVtx1)
 @@skipfog:
-    slv     vPairTPosI[8],    (VTX_SCR_VEC   )(secondVtxPos)
+    slv     vPairTPosI[8],    (VTX_SCR_VEC   )(outVtx2)
     veq     $v29, $v31, $v31[0q]       // Set VCC to 10101010
-    slv     vPairTPosI[0],    (VTX_SCR_VEC   )($19)
+    slv     vPairTPosI[0],    (VTX_SCR_VEC   )(outVtx1)
     vmrg    sOC2, sOC2, sOC3           // Elems 0-3 are results for vtx 0, 4-7 for vtx 1
 .if CFG_LEGACY_VTX_PIPE
     lpv     $v14[7],          (tempVpRGBA - 8)(rdpCmdBufEndP1) // Y to elem 0, 4
@@ -3063,7 +3005,7 @@ skip_return_to_lt_or_loop:
     sdv     sTPN[0],          (tempVpPkNorm)(rdpCmdBufEndP1) // Vtx 0 and 1 packed normals
 .endif
     // vnop
-    ssv     sCLZ[12],         (VTX_SCR_Z     )(secondVtxPos)
+    ssv     sCLZ[12],         (VTX_SCR_Z     )(outVtx2)
     // vnop
 .if CFG_LEGACY_VTX_PIPE
     lpv     $v15[6],          (tempVpRGBA - 8)(rdpCmdBufEndP1) // Z to elem 0, 4
@@ -3071,7 +3013,7 @@ skip_return_to_lt_or_loop:
     addi    $1, $1, -2*inputVtxSize     // Counter of remaining verts * inputVtxSize
 .endif
     // vnop
-    ssv     sCLZ[4],          (VTX_SCR_Z     )($19)
+    ssv     sCLZ[4],          (VTX_SCR_Z     )(outVtx1)
     vge     $v29, sOC2, sO47           // Each compare to coeffs 4-7
 // vPairNrml is $v16
     lpv     vPairNrml[0],     (tempVpRGBA)(rdpCmdBufEndP1) // Vtx pair normals
@@ -3094,13 +3036,13 @@ skip_return_to_lt_or_loop:
 // sOUTF = vPairPosF is $v21, or vPairTPosF is $v23
     vmadn   sOUTF, vM2F, vPairPosI[2h] // vPairPosI/F = vertices world coords
     bnez    $20, @@skipv1    // If nonzero, at least one equation false, don't set occluded flag
-     sh     $24,              (VTX_CLIP      )(secondVtxPos) // Store second vertex clip flags
+     sh     $24,              (VTX_CLIP      )(outVtx2) // Store second vertex clip flags
     ori     $10, $10, CLIP_OCCLUDED // All equations true, set vtx 1 occluded flag
 @@skipv1:    
 // sOUTI = vPairPosI is $v20, or vPairTPosI is $v24
     vmadh   sOUTI, vM2I, vPairPosI[2h] // or vPairTPosI/F = vertices clip coords
     jr      $ra
-     sh     $10,              (VTX_CLIP      )($19)          // Store first vertex results
+     sh     $10,              (VTX_CLIP      )(outVtx1)          // Store first vertex results
 
 .endif // CFG_NO_OCCLUSION_PLANE
 
@@ -3722,7 +3664,7 @@ lt_post:
     vge     sKPG, sKPI, $v31[6]  // Clamp W/fog to >= 0x7F00 (low byte is used)
     lpv     ltLookAt[0], (xfrmLookatDirs + 0)($zero) // Lookat 0 in 0-2, 1 in 4-6; = vNrmOut
     vge     sCLZ, sKPI, $v31[2]              // 0; clamp Z to >= 0
-    sh      $10, (VTX_CLIP      )($19) // Store first vertex flags
+    sh      $10, (VTX_CLIP      )(outVtx1) // Store first vertex flags
     vne     $v29, $v31, $v31[3h]           // Set VCC to 11101110
     beqz    $17, vtx_return_from_lighting
      vmrg   vPairRGBA, vPairLt, vPairRGBA  // RGB = light, A = vtx alpha