diff --git a/f3dex3.s b/f3dex3.s
index 88039fd..f58a961 100644
--- a/f3dex3.s
+++ b/f3dex3.s
@@ -2102,6 +2102,53 @@ ovl3_padded_end:
 .orga max(max(ovl2_padded_end - ovl2_start, ovl4_padded_end - ovl4_start) + orga(ovl3_start), orga())
 ovl234_end:
 
+/*
+
+Vertex load:
+
+Prepare and trigger DMA
+Post DMA register setup
+
+-> Clipping
+Set up constants
+Clipping ->
+
+Check and recompute MVP
+
+If not lighting goto setup_vtx_store
+
+Check lighting mode, jump to one lighting overlay or the other entrypoint
+
+Lighting setup (e.g. transform directions)
+
+Call while_wait_dma_busy
+
+Lighting only on all verts
+
+lw v2.rgba
+lw v1.st
+sw v2.rgba -> v1.st
+lpv, lpv, lpv
+
+end:
+suv v1.rgba
+lw fake v1.st
+sw real v1.st
+sw v2.rgba
+
+setup_vtx_store:
+Load MVP
+Call while_wait_dma_busy (possibly again if did lighting)
+Preload first vertex info
+
+-> Clipping
+Vertex write loop
+Clipping ->
+
+Epilogue
+
+*/
+
 vtx_after_dma:
     andi    inputVtxPos, dmemAddr, 0xFFF8      // Round down input start addr to DMA word
     lhu     $5, geometryModeLabel + 1          // Load middle 2 bytes of geom mode
@@ -2113,38 +2160,73 @@ vtx_after_dma:
     add     perfCounterA, perfCounterA, $11    // Add to vertex count
 .endif
 vtx_setup_constants:
+
+.if CFG_LEGACY_VTX_PIPE
+
     // Computes modified viewport scale and offset including fog info, and stores
     // these to temp memory in the RDP buffer. This is only used during vertex write
     // and the first half of clipping, so that memory is not used then.
     llv     $v23[0], (fogFactor)($zero)           // Load fog multiplier 0 and offset 1
-.if CFG_LEGACY_VTX_PIPE && CFG_NO_OCCLUSION_PLANE
+.if CFG_NO_OCCLUSION_PLANE
     veq     $v29, $v31, $v31[3h] // VCC = 00010001
-.elseif !CFG_NO_OCCLUSION_PLANE
+.else
     vge     $v29, $v31, $v31[2h] // VCC = 00110011
 .endif
     ldv     sVPO[0], (viewport + 8)($zero)        // Load vtrans duplicated in 0-3 and 4-7
-.if CFG_LEGACY_VTX_PIPE && CFG_NO_OCCLUSION_PLANE
+.if CFG_NO_OCCLUSION_PLANE
 // sFGM is $v12 // FoG Mask
     vmrg    sFGM, vOne, $v31[2] // sFGM is 0,0,0,1,0,0,0,1
-.elseif !CFG_NO_OCCLUSION_PLANE
+.else
     vmrg    sOPMs, vOne, $v31[1] // Signs of sOPMs are --++--++
 .endif
     ldv     sVPO[8], (viewport + 8)($zero)
     vne     $v29, $v31, $v31[3h]                  // VCC = 11101110
+    llv     sSTS[0], (textureSettings2)($zero)    // Texture ST scale in 0, 1
     ldv     sVPS[0], (viewport)($zero)            // Load vscale duplicated in 0-3 and 4-7
     ldv     sVPS[8], (viewport)($zero)
-    lqv     $v30, (fxParams - altBase)(altBaseReg) // Parameters for vtx and lighting
 .if !CFG_NO_OCCLUSION_PLANE
     vmudh   sOPMs, sOPMs, $v31[5] // sOPMs is 0xC000, 0xC000, 0x4000, 0x4000, repeat
 .endif
-.if CFG_LEGACY_VTX_PIPE
-    lbu     $7, mITValid
+    llv     $v30[0], (attrOffsetST - altBase)(altBaseReg)  // Texture ST offset in 0, 1
     vmrg    sVPO, sVPO, $v23[1]                   // Put fog offset in elements 3,7 of vtrans
-    llv     sSTS[0], (textureSettings2)($zero)    // Texture ST scale in 0, 1
+    llv     $v30[8], (attrOffsetST - altBase)(altBaseReg)  // Texture ST offset in 4, 5
+    vmov    sSTS[4], sSTS[0]
+    andi    $11, $5, G_ATTROFFSET_ST_ENABLE >> 8
     vmrg    sVPS, sVPS, $v23[0]                   // Put fog multiplier in elements 3,7 of vscale
+    bnez    $11, @@skipoffset
+     lbu    $7, mITValid
+    vclr    $v30
+@@skipoffset:
+.if !CFG_NO_OCCLUSION_PLANE
+    sqv     sOPMs, (tempOccPlusMinus)(rdpCmdBufEndP1) // Store occlusion plane -/+4000 constants
+    sqv     sVPO, (tempViewportOffset)(rdpCmdBufEndP1) // Store viewport offset
+    sqv     sVPS, (tempViewportScale)(rdpCmdBufEndP1) // Store viewport scale
+.endif
+    vmov    sSTS[5], sSTS[1]
     bgtz    $ra, clip_after_constants             // Return to clipping if from there
-     llv    sSTS[8], (textureSettings2)($zero)    // Texture ST scale in 4, 5
+     lsv    $v30[6], (perspNorm - altBase)(altBaseReg) // Perspective norm
+
 .else
+
+    // Computes modified viewport scale and offset including fog info, and stores
+    // these to temp memory in the RDP buffer. This is only used during vertex write
+    // and the first half of clipping, so that memory is not used then.
+    llv     $v23[0], (fogFactor)($zero)           // Load fog multiplier 0 and offset 1
+.if !CFG_NO_OCCLUSION_PLANE
+    vge     $v29, $v31, $v31[2h] // VCC = 00110011
+.endif
+    ldv     sVPO[0], (viewport + 8)($zero)        // Load vtrans duplicated in 0-3 and 4-7
+.if !CFG_NO_OCCLUSION_PLANE
+    vmrg    sOPMs, vOne, $v31[1] // Signs of sOPMs are --++--++
+.endif
+    ldv     sVPO[8], (viewport + 8)($zero)
+    vne     $v29, $v31, $v31[3h]                  // VCC = 11101110
+    ldv     sVPS[0], (viewport)($zero)            // Load vscale duplicated in 0-3 and 4-7
+    ldv     sVPS[8], (viewport)($zero)
+    lqv     $v30, (fxParams - altBase)(altBaseReg) // Parameters for vtx and lighting
+.if !CFG_NO_OCCLUSION_PLANE
+    vmudh   sOPMs, sOPMs, $v31[5] // sOPMs is 0xC000, 0xC000, 0x4000, 0x4000, repeat
+.endif
     lw      $10, (geometryModeLabel)($zero)
     vmrg    sVPO, sVPO, $v23[1]                   // Put fog offset in elements 3,7 of vtrans
 .if !CFG_NO_OCCLUSION_PLANE
@@ -2159,6 +2241,7 @@ vtx_setup_constants:
 @@skipzeroao:
     bgtz    $ra, clip_after_constants             // Return to clipping if from there
      sqv    sVPS, (tempViewportScale)(rdpCmdBufEndP1) // Store viewport scale
+
 .endif
 
 vtx_after_setup_constants:
@@ -2167,7 +2250,9 @@ vtx_after_setup_constants:
      li     $16, vtx_loop_no_lighting      // This is clipFlags, but not modified
     li      $16, lt_vtx_pair               // during vtx_store
 @@skip_lighting:
+
 .if CFG_LEGACY_VTX_PIPE
+
     bnez    $7, skip_vtx_mvp
      li     $2, vpMatrix
     li      $3, mMatrix
@@ -2272,14 +2357,17 @@ vtx_after_calc_mit:
 // $v18:$v19 = available, $v20:$v21 = vPairPosI/F/temp,
 // $v22 = vPairST, $v23:$v24 = vPairTPosF/I/temp, $v25:$v26 = temps, $v27 = vPairRGBA,
 // $v28 = vOne, $v29 = garbage, $v30 = params, $v31 = constants
-// $1: 0x10 vtx count, $2: need for clipping, $3: init lt ptr, $4: vtx1/perf,
+// $1: 0x10 vtx count, $2: need for clipping, $3: temp, $4: vtx1/perf,
 // $5: geom mode mid, $6: need for clipping, $7: fog flag, $8: secondVtxPos,
-// $9: need for clipping, $10:$11: temp, $12: perf, $13: altBaseReg, $14: inputVtxPos,
-// $15: outputVtxPos, $16: lt jump addr, $17:$18: need for clipping, $19: shadow out vtx,
-// $20: temp, $21: need for clipping, $22:$23: cmd buf, $24: temp, $25: cmd_w0 global,
+// $9: clipping / curLight, $10:$11: temp, $12: perf, $13: altBaseReg, $14: inputVtxPos,
+// $15: outputVtxPos, $16: clipping / lt jump addr, $17:$18: clipping, $19: shadow out vtx,
+// $20: temp, $21: clipping / first light, $22:$23: cmd buf, $24: temp, $25: cmd_w0 global,
 // $26: taskDataPtr, $27: inputBufferPos, $28:$30: perf, $ra return addr
 
 .align 8
+
+.if CFG_NO_OCCLUSION_PLANE
+
 vtx_loop_no_lighting:
     vmadh   $v29, vM1I, vPairPosI[1h]
     andi    $11, $11, CLIP_SCAL_NPXY // Mask to only bits we care about
@@ -2299,7 +2387,7 @@ vtx_store_for_clip:
     vmudl   $v29, vPairTPosF, $v30[3]       // Persp norm
     sub     $20, secondVtxPos, $7           // Points 8 before secondVtxPos if fog, else 0
 // s1WI is $v16 // vtx_store 1/W Int
-    vmadm   s1WI, vPairTPosI, $v30[3]        // Persp norm
+    vmadm   s1WI, vPairTPosI, $v30[3]       // Persp norm
     addi    outputVtxPos, outputVtxPos, 2*vtxSize // Points to SECOND output vtx
 // s1WF is $v17 // vtx_store 1/W Frac
     vmadn   s1WF, $v31, $v31[2]             // 0
@@ -2338,8 +2426,7 @@ vtx_store_for_clip:
     ldv     sTCL[0],   (VTX_IN_TC + 2 * inputVtxSize)(inputVtxPos) // ST in 0:1, RGBA in 2:3
     vmadh   s1WI, s1WI, sRTI[3h]
     sdv     vPairTPosF[0],  (VTX_FRAC_VEC  )($19)
-// sST2 equ $v11 // vtx_store ST coordinates copy 2
-    vmudm   sST2, vPairST, sSTS       // Scale ST
+    vch     $v29, vPairTPosI, sSCI[3h] // Clip scaled high
     lsv     vPairTPosF[14], (VTX_Z_FRAC    )(secondVtxPos) // load Z into W slot, will be for fog below
     vmudh   $v29, vOne, $v31[4]  // 4
     sdv     vPairTPosI[8],  (VTX_INT_VEC   )(secondVtxPos)
@@ -2347,9 +2434,10 @@ vtx_store_for_clip:
     lsv     vPairTPosF[6],  (VTX_Z_FRAC    )($19) // load Z into W slot, will be for fog below
     vmadh   s1WI, s1WI, $v31[0]  // -4
     sdv     vPairTPosI[0],  (VTX_INT_VEC   )($19)
-    // vnop
+    vmudm   $v29, vPairST, sSTS       // Scale ST
     ldv     sTCL[8],   (VTX_IN_TC + 3 * inputVtxSize)(inputVtxPos) // ST in 4:5, RGBA in 6:7
-    vch     $v29, vPairTPosI, sSCI[3h] // Clip scaled high
+// sST2 equ $v11 // vtx_store ST coordinates copy 2
+    vmadh   sST2, vOne, $v30          // + 1 * ST offset; elems 0, 1, 4, 5
     suv     vPairRGBA[4],   (VTX_COLOR_VEC )(secondVtxPos) // Store RGBA for second vtx
     vmudl   $v29, s1WF, sRTF[2h]
     lsv     vPairTPosI[14], (VTX_Z_INT     )(secondVtxPos) // load Z into W slot, will be for fog below
@@ -2422,6 +2510,246 @@ vtx_epilogue:
     j       vertex_end
      lqv    $v30, (v30Value)($zero)    // Restore value overwritten in vtx_store
 
+
+.else // not CFG_NO_OCCLUSION_PLANE
+
+// sKPI is $v11 // vtx_store Keep Int (keep across pipelining)
+// sKPG is vBBB = $v21 // vtx_store Keep Fog
+// sCLZ is $v19
+// s1WI is $v16 // vtx_store 1/W Int
+// s1WF is $v17 // vtx_store 1/W Frac
+// sKPF is $v18 // vtx_store Keep Frac
+// sSCF is $v20 // vtx_store Scaled Clipping Frac
+// sSCI is $v21 // vtx_store Scaled Clipping Int
+// sRTF is $v25 // vtx_store Reciprocal Temp Frac
+// sRTI is $v26 // vtx_store Reciprocal Temp Int
+// sTCL is $v19 // vtx_store Temp CoLor
+// sST2 equ $v11 // vtx_store ST coordinates copy 2
+// vPairPosI is $v20
+
+vtx_loop_no_lighting:
+    
+    
+vtx_return_from_lighting:
+vtx_store_for_clip:
+
+    
+    
+    
+    
+    
+    
+    
+     
+    
+    
+    
+    
+
+    vge     $v29, sOC2, sO47     // Each compare to coeffs 4-7
+    slv     sKPF[2],  (VTX_SCR_Z      )($19)
+    vmudn   $v29, vM3F, vOne
+    cfc2    $20, $vcc
+    vmadh   $v29, vM3I, vOne
+    luv     vPairRGBA[0],    (tempVpRGBA)(rdpCmdBufEndP1) // Vtx pair RGBA
+    vmadn   $v29, vM0F, vPairPosI[0h]
+    andi    $11, $11, CLIP_OCCLUDED | (CLIP_OCCLUDED >> 4) // Only bits 3, 7 from occlusion
+    vmadh   $v29, vM0I, vPairPosI[0h]
+    or      $20, $20, $11    // Combine occlusion results. Any set in 0-3, 4-7 = not occluded
+    vmadn   $v29, vM1F, vPairPosI[1h]
+    andi    $11, $20, 0x00F0 // Bits 4-7 for vtx 2
+    vmadh   $v29, vM1I, vPairPosI[1h]
+    bnez    $11, @@skipv2    // If nonzero, at least one equation false, don't set occluded flag
+     addi   $1, $1, -2*inputVtxSize         // Decrement vertex count by 2
+    ori     $24, $24, CLIP_OCCLUDED // All equations true, set vtx 2 occluded flag
+@@skipv2:
+    vmadn   vPairTPosF, vM2F, vPairPosI[2h]
+    andi    $20, $20, 0x000F // Bits 0-3 for vtx 1
+    vmadh   vPairTPosI, vM2I, vPairPosI[2h]
+    bnez    $20, @@skipv1    // If nonzero, at least one equation false, don't set occluded flag
+     sh     $24,            (VTX_CLIP      )(secondVtxPos) // Store second vertex clip flags
+    ori     $10, $10, CLIP_OCCLUDED // All equations true, set vtx 1 occluded flag
+@@skipv1:
+    vmudl   $v29, vPairTPosF, $v30[3]       // Persp norm
+    sh      $10,            (VTX_CLIP      )($19) // Store first vertex flags
+    vmadm   s1WI, vPairTPosI, $v30[3]       // Persp norm
+    
+    ldv     sOCM[0], (occlusionPlaneMidCoeffs - altBase)(altBaseReg)
+    ldv     sOCM[8], (occlusionPlaneMidCoeffs - altBase)(altBaseReg)
+    addi    outputVtxPos, outputVtxPos, 2*vtxSize // Points to SECOND output vtx
+    
+    vmadn   s1WF, $v31, $v31[2]             // 0
+    vmudn   sSCF, vPairTPosF, $v31[3]       // W * clip ratio for scaled clipping
+    vmadh   sSCI, vPairTPosI, $v31[3]       // W * clip ratio for scaled clipping
+    vrcph   $v29[0], s1WI[3]
+    vrcpl   sRTF[2], s1WF[3]
+    vrcph   sRTI[3], s1WI[7]
+    vrcpl   sRTF[6], s1WF[7]
+    vrcph   sRTI[7], $v31[2] // 0
+    vmudn   $v29, vPairTPosF, sOCM          // X * kx, Y * ky, Z * kz
+    vmadh   $v29, vPairTPosI, sOCM          // Int * int
+    vreadacc sOC1, ACC_UPPER                // Load int * int portion
+    vch     $v29, vPairTPosI, vPairTPosI[3h] // Clip screen high
+    vcl     $v29, vPairTPosF, vPairTPosF[3h] // Clip screen low
+    vmudl   $v29, s1WF, sRTF[2h]
+    cfc2    $10, $vcc                   // Screen clip results
+    vmadm   $v29, s1WI, sRTF[2h]
+    sdv     vPairTPosF[8],  (VTX_FRAC_VEC  )(secondVtxPos)
+    vmadn   s1WF, s1WF, sRTI[3h]
+    ldv     sTCL[0],   (VTX_IN_TC + 2 * inputVtxSize)(inputVtxPos) // ST in 0:1, RGBA in 2:3
+    vmadh   s1WI, s1WI, sRTI[3h]
+    sdv     vPairTPosF[0],  (VTX_FRAC_VEC  )($19)
+    veq     $v29, $v31, $v31[3h] // Set VCC to 00010001
+    lsv     vPairTPosF[14], (VTX_Z_FRAC    )(secondVtxPos) // load Z into W slot, will be for fog below
+    vmudh   $v29, vOne, $v31[4]  // 4
+    sdv     vPairTPosI[8],  (VTX_INT_VEC   )(secondVtxPos)
+    vmadn   s1WF, s1WF, $v31[0]  // -4
+    lsv     vPairTPosF[6],  (VTX_Z_FRAC    )($19) // load Z into W slot, will be for fog below
+    vmadh   s1WI, s1WI, $v31[0]  // -4
+    sdv     vPairTPosI[0],  (VTX_INT_VEC   )($19)
+    vmrg    sOC1, sOCM, sOC1  // Put constant factor in elems 3, 7
+    ldv     sTCL[8],   (VTX_IN_TC + 3 * inputVtxSize)(inputVtxPos) // ST in 4:5, RGBA in 6:7
+    vch     $v29, vPairTPosI, sSCI[3h] // Clip scaled high
+    suv     vPairRGBA[4],   (VTX_COLOR_VEC )(secondVtxPos) // Store RGBA for second vtx
+    vmudl   $v29, s1WF, sRTF[2h]
+    lsv     vPairTPosI[14], (VTX_Z_INT     )(secondVtxPos) // load Z into W slot, will be for fog below
+    vmadm   $v29, s1WI, sRTF[2h]
+    suv     vPairRGBA[0],   (VTX_COLOR_VEC )($19) // Store RGBA for first vtx
+    vmadn   s1WF, s1WF, sRTI[3h]
+    lsv     vPairTPosI[6],  (VTX_Z_INT     )($19) // load Z into W slot, will be for fog below
+    vmadh   s1WI, s1WI, sRTI[3h]
+    srl     $24, $10, 4            // Shift second vertex screen clipping to first slots
+    vcl     $v29, vPairTPosF, sSCF[3h] // Clip scaled low
+    andi    $24, $24, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
+    vadd    sOC1, sOC1, sOC1[0q] // Add pairs upwards
+    cfc2    $20, $vcc                   // Scaled clip results
+    vmudl   $v29, vPairTPosF, s1WF[3h] // Pos times inv W
+    vmadm   $v29, vPairTPosI, s1WF[3h] // Pos times inv W
+    vmadn   vPairTPosF, vPairTPosF, s1WI[3h]
+    vmadh   vPairTPosI, vPairTPosI, s1WI[3h] // vPairTPosI:vPairTPosF = pos times inv W
+    vadd    sOC1, sOC1, sOC1[1h] // Add elems 1, 5 to 3, 7
+    // vnop
+    vmudl   $v29, vPairTPosF, $v30[3] // Persp norm
+    vmadm   vPairTPosI, vPairTPosI, $v30[3] // Persp norm
+    vmadn   vPairTPosF, $v31, $v31[2] // 0; Now vPairTPosI:vPairTPosF = projected position
+    // vnop
+    vlt     $v29, sOC1, $v31[2] // Occlusion plane equation < 0 in elems 3, 7
+    vmudh   $v29, sVPO, vOne // offset * 1
+    cfc2    $11, $vcc // Load occlusion plane mid results to bits 3 and 7
+    vmadn   sKPF, vPairTPosF, sVPS   // + pos frac * scale
+    vmadh   sKPI, vPairTPosI, sVPS   // int part, sKPI:sKPF is now screen space pos
+    vmadh   sFOG, vOne, $v31[6] // + 0x7F00 in all elements, clamp to 0x7FFF for fog
+    vmudm   $v29, vPairST, sSTS       // Scale ST
+    ldv     sO03[0], (occlusionPlaneEdgeCoeffs - altBase)(altBaseReg) // Load coeffs 0-3
+    vmadh   sST2, vOne, $v30          // + 1 * ST offset; elems 0, 1, 4, 5
+    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(secondVtxPos)
+    vmudh   sOSC, sKPI, $v31[4]  // 4; scale up x and y
+    ldv     sO03[8], (occlusionPlaneEdgeCoeffs - altBase)(altBaseReg) // and for vtx 2
+    vge     sCLZ, sKPI, $v31[2]  // 0; clamp Z to >= 0
+    ldv     sO47[0], (occlusionPlaneEdgeCoeffs + 8 - altBase)(altBaseReg) // Load coeffs 4-7
+    vge     sFOG, sFOG, $v31[6]  // 0x7F00; clamp fog to >= 0 (want low byte only)
+    ldv     sO47[8], (occlusionPlaneEdgeCoeffs + 8 - altBase)(altBaseReg) // and for vtx 2
+    vmulf   $v29, sOPM, sKPI[1h] // -0x4000*Y1, --, +0x4000*Y1, --, repeat vtx 2
+    sub     $20, secondVtxPos, $7 // Points 8 before secondVtxPos if fog, else 0
+    vmacf   sOC2, sO03, sOSC[0h]  //    4*X1*c0, --,    4*X1*c2, --, repeat vtx 2
+    sub     $3, $19, $7           // Points 8 before $19 if fog, else 0
+    vmulf   $v29, sOPM, sKPI[0h]  // --, -0x4000*X1, --, +0x4000*X1, repeat vtx 2
+    ssv     sCLZ[12], (VTX_SCR_Z      )(secondVtxPos)
+    vmacf   sOC3, sO03, sOSC[1h]  // --,    4*Y1*c1, --,    4*Y1*c3, repeat vtx 2
+    sbv     sFOG[15], (VTX_COLOR_A + 8)($20) // In VTX_SCR_Y if fog disabled...
+    vmov    sKPF[1], sCLZ[2]
+    slv     sKPI[8],  (VTX_SCR_VEC    )(secondVtxPos)
+    // vnop
+    sbv     sFOG[7],  (VTX_COLOR_A + 8)($3) // ...which gets overwritten below
+    veq     $v29, $v31, $v31[0q] // Set VCC to 10101010
+    blez    $1, vtx_epilogue
+     vmrg   sOC2, sOC2, sOC3     // Elems 0-3 are results for vtx 0, 4-7 for vtx 1
+    // vnop
+    jr      $16                    // lt_vtx_pair or vtx_loop_no_lighting
+     slv    sKPI[0],  (VTX_SCR_VEC    )($19)
+
+
+    
+    
+    
+    
+    
+    
+
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    sra     $24, $1, 31        // All 1s if on last iter
+    andi    $24, $24, vtxSize  // vtxSize if on last iter, else normally 0
+    sub     secondVtxPos, outputVtxPos, $24 // First output vtx on last iter, else second
+    addi    $19, outputVtxPos, -vtxSize  // First output vtx always
+    
+    
+    ssv     s1WF[14],          (VTX_INV_W_FRAC)(secondVtxPos)
+    ldv     vPairPosI[0], (VTX_IN_OB + 2 * inputVtxSize)(inputVtxPos) // Pos of 1st vector for next iteration
+    ldv     vPairPosI[8], (VTX_IN_OB + 3 * inputVtxSize)(inputVtxPos) // Pos of 2nd vector on next iteration
+    addi    inputVtxPos, inputVtxPos, (2 * inputVtxSize) // Advance two positions forward in the input vertices
+    andi    $10, $10, CLIP_SCRN_NPXY | CLIP_CAMPLANE // Mask to only screen bits we care about
+    sll     $11, $20, 4            // Shift first vertex scaled clipping to second slots
+    ssv     s1WF[6],           (VTX_INV_W_FRAC)($19)
+    ssv     s1WI[14],          (VTX_INV_W_INT )(secondVtxPos)
+    ssv     s1WI[6],           (VTX_INV_W_INT )($19)
+    slv     sST2[8],           (VTX_TC_VEC    )(secondVtxPos) // Store scaled S, T vertex 2
+    slv     sST2[0],           (VTX_TC_VEC    )($19) // Store scaled S, T vertex 1
+    andi    $20, $20, CLIP_SCAL_NPXY // Mask to only bits we care about
+    or      $24, $24, $20            // Combine results for second vertex
+    
+    
+    sdv     sTCL[8],      (tempVpRGBA)(rdpCmdBufEndP1) // Vtx 0 and 1 RGBA in order
+    
+    
+    andi    $11, $11, CLIP_SCAL_NPXY // Mask to only bits we care about
+    or      $10, $10, $11          // Combine results for first vertex
+    
+
+    
+    
+    lqv     sOPM, (tempOccPlusMinus)(rdpCmdBufEndP1) // Load occlusion plane -/+4000 constants
+    
+
+
+vtx_epilogue:
+    vge     sKPG, sKPI, $v31[6]  // Clamp W/fog to >= 0x7F00 (low byte is used)
+    andi    $11, $11, CLIP_SCAL_NPXY // Mask to only bits we care about
+    vge     sCLZ, sKPI, $v31[2]              // 0; clamp Z to >= 0
+    or      $10, $10, $11          // Combine results for first vertex
+    beqz    $7, @@skip_fog
+     slv    sKPI[8],  (VTX_SCR_VEC    )(secondVtxPos)
+    sbv     sKPG[15], (VTX_COLOR_A    )(secondVtxPos)
+    sbv     sKPG[7],  (VTX_COLOR_A    )($19)
+@@skip_fog:
+    vmov    sKPF[1], sCLZ[2]
+    ssv     sCLZ[12], (VTX_SCR_Z      )(secondVtxPos)
+    slv     sKPI[0],  (VTX_SCR_VEC    )($19)
+    ssv     sKPF[12], (VTX_SCR_Z_FRAC )(secondVtxPos)
+    bltz    $ra, clip_after_vtx_store  // $ra - from clipping or + from while_wait_dma_busy
+     slv    sKPF[2],  (VTX_SCR_Z      )($19)
+    sh      $10,      (VTX_CLIP       )($19) // Store first vertex flags
+    j       vertex_end
+     lqv    $v30, (v30Value)($zero)    // Restore value overwritten in vtx_store
+
+
+
+.endif
+
 .else // end of new LVP_NOC
 
 .if CFG_LEGACY_VTX_PIPE
@@ -3189,7 +3517,7 @@ lt_vtx_pair:
 .endif
     j       lt_continue_setup
 .if CFG_LEGACY_VTX_PIPE
-     lbu    $3, numLightsxSize
+     lbu    $21, numLightsxSize
 .else
      andi   $11, $5, G_PACKED_NORMALS >> 8
 .endif
@@ -3220,9 +3548,10 @@ lt_continue_setup:
 //
 // LVP lighting setup
 //
+    llv     $v30[12], (aoAmbientFactor - altBase)(altBaseReg) // Ambient and dir to elems 6, 7
     lb      $11, dirLightsXfrmValid
     li      $10, -1                   // To mark lights valid
-    addi    $3, $3, altBase           // Point to ambient light; stored through vtx proc
+    addi    $21, $21, altBase         // Point to ambient light; stored through vtx proc
     andi    $17, $5, G_TEXTURE_GEN >> 8 // This is clipPolyRead, but not touched in vtx_store
     and     $11, $11, $7              // Zero if either matrix or lights invalid
     bnez    $11, lt_setup_after_xfrm
@@ -3298,7 +3627,7 @@ xfrm_light_loop_2:
     vmudh   $v29, $v4, $v4       // Squared
     sub     $10, curLight, altBaseReg // Is curLight (write ptr) <= 0?
     vreadacc $v7, ACC_MIDDLE     // Read not-clamped value
-    sub     $11, curLight, $3    // Is curLight (write ptr) <, =, or > ambient light?
+    sub     $11, curLight, $21   // Is curLight (write ptr) <, =, or > ambient light?
     vreadacc $v6, ACC_UPPER
     sw      $20,    (tempXfrmSingle)(rdpCmdBufEndP1) // Store light 0
     vmudm   $v29, $v19, $v23[0h] // Vec int * frac scaling
@@ -3327,7 +3656,7 @@ xfrm_light_loop_2:
 lt_setup_after_xfrm:
     // Load first light direction to $v13, which is not used throughout vtx processing.
     j       vtx_after_lt_setup
-     lpv    $v13[0], (ltBufOfs + 8 - lightSize)($3) // Xfrmed dir in elems 4-6
+     lpv    $v13[0], (ltBufOfs + 8 - lightSize)($21) // Xfrmed dir in elems 4-6
     
 xfrm_light_store_lookat:
     vmadh   $v29, $v1,  $v3[1h]
@@ -3358,9 +3687,9 @@ lt_vtx_pair:
     andi    $11, $11, CLIP_SCAL_NPXY // Mask to only bits we care about
 .endif
     vmulf   $v29, vPairNrml, $v13[4] // Normals X elems 0, 4 * first light dir
-    luv     vPairLt,     (ltBufOfs + 0)($3)  // Total light level, init to ambient
+    luv     vPairLt,     (ltBufOfs + 0)($21)  // Total light level, init to ambient
     vmacf   $v29, $v14, $v13[5] // Normals Y elems 0, 4 * first light dir
-    lpv     vDDD[0],     (ltBufOfs + 8 - 2*lightSize)($3) // Xfrmed dir in elems 4-6
+    lpv     vDDD[0],     (ltBufOfs + 8 - 2*lightSize)($21) // Xfrmed dir in elems 4-6
     vmacf   vAAA, $v15, $v13[6] // Normals Z elems 0, 4 * first light dir
 .if CFG_NO_OCCLUSION_PLANE // New LVP_NOC
     or      $10, $10, $11          // Combine results for first vertex
@@ -3369,14 +3698,14 @@ lt_vtx_pair:
     // nop
     // vnop
 .endif
-    beq     $3, altBaseReg, lt_post
+    beq     $21, altBaseReg, lt_post
 .if CFG_NO_OCCLUSION_PLANE // New LVP_NOC
      addi   $1, $1, -2*inputVtxSize         // Decrement vertex count by 2
 .else
      lpv    ltLookAt[0], (xfrmLookatDirs + 0)($zero) // Lookat 0 in 0-2, 1 in 4-6; = vNrmOut
 .endif
     // vnop
-    move    curLight, $3                     // Point to ambient light
+    move    curLight, $21                   // Point to ambient light
 lt_loop:
     vge     vCCC, vAAA, $v31[2] // 0; clamp dot product to >= 0
     vmulf   $v29, vPairNrml, vDDD[4] // Normals X elems 0, 4