; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %}

define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: srem_i128(
; CHECK:       {
; CHECK-NEXT:    .reg .pred %p<22>;
; CHECK-NEXT:    .reg .b32 %r<12>;
; CHECK-NEXT:    .reg .b64 %rd<126>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
; CHECK-NEXT:    ld.param.v2.b64 {%rd45, %rd46}, [srem_i128_param_0];
; CHECK-NEXT:    ld.param.v2.b64 {%rd49, %rd50}, [srem_i128_param_1];
; CHECK-NEXT:    shr.s64 %rd2, %rd46, 63;
; CHECK-NEXT:    sub.cc.s64 %rd51, 0, %rd45;
; CHECK-NEXT:    subc.cc.s64 %rd52, 0, %rd46;
; CHECK-NEXT:    setp.lt.s64 %p1, %rd46, 0;
; CHECK-NEXT:    selp.b64 %rd4, %rd52, %rd46, %p1;
; CHECK-NEXT:    selp.b64 %rd3, %rd51, %rd45, %p1;
; CHECK-NEXT:    sub.cc.s64 %rd53, 0, %rd49;
; CHECK-NEXT:    subc.cc.s64 %rd54, 0, %rd50;
; CHECK-NEXT:    setp.lt.s64 %p2, %rd50, 0;
; CHECK-NEXT:    selp.b64 %rd6, %rd54, %rd50, %p2;
; CHECK-NEXT:    selp.b64 %rd5, %rd53, %rd49, %p2;
; CHECK-NEXT:    or.b64 %rd55, %rd5, %rd6;
; CHECK-NEXT:    setp.eq.b64 %p3, %rd55, 0;
; CHECK-NEXT:    or.b64 %rd56, %rd3, %rd4;
; CHECK-NEXT:    setp.eq.b64 %p4, %rd56, 0;
; CHECK-NEXT:    or.pred %p5, %p3, %p4;
; CHECK-NEXT:    setp.ne.b64 %p6, %rd6, 0;
; CHECK-NEXT:    clz.b64 %r1, %rd6;
; CHECK-NEXT:    cvt.u64.u32 %rd57, %r1;
; CHECK-NEXT:    clz.b64 %r2, %rd5;
; CHECK-NEXT:    cvt.u64.u32 %rd58, %r2;
; CHECK-NEXT:    add.s64 %rd59, %rd58, 64;
; CHECK-NEXT:    selp.b64 %rd60, %rd57, %rd59, %p6;
; CHECK-NEXT:    setp.ne.b64 %p7, %rd4, 0;
; CHECK-NEXT:    clz.b64 %r3, %rd4;
; CHECK-NEXT:    cvt.u64.u32 %rd61, %r3;
; CHECK-NEXT:    clz.b64 %r4, %rd3;
; CHECK-NEXT:    cvt.u64.u32 %rd62, %r4;
; CHECK-NEXT:    add.s64 %rd63, %rd62, 64;
; CHECK-NEXT:    selp.b64 %rd64, %rd61, %rd63, %p7;
; CHECK-NEXT:    mov.b64 %rd116, 0;
; CHECK-NEXT:    sub.cc.s64 %rd66, %rd60, %rd64;
; CHECK-NEXT:    subc.cc.s64 %rd8, %rd116, 0;
; CHECK-NEXT:    setp.ne.b64 %p8, %rd8, 0;
; CHECK-NEXT:    and.pred %p10, %p8, %p8;
; CHECK-NEXT:    setp.eq.b64 %p11, %rd8, 0;
; CHECK-NEXT:    setp.gt.u64 %p12, %rd66, 127;
; CHECK-NEXT:    and.pred %p13, %p11, %p12;
; CHECK-NEXT:    or.pred %p14, %p13, %p10;
; CHECK-NEXT:    or.pred %p15, %p5, %p14;
; CHECK-NEXT:    xor.b64 %rd67, %rd66, 127;
; CHECK-NEXT:    or.b64 %rd68, %rd67, %rd8;
; CHECK-NEXT:    setp.eq.b64 %p16, %rd68, 0;
; CHECK-NEXT:    selp.b64 %rd125, 0, %rd4, %p15;
; CHECK-NEXT:    selp.b64 %rd124, 0, %rd3, %p15;
; CHECK-NEXT:    or.pred %p17, %p15, %p16;
; CHECK-NEXT:    @%p17 bra $L__BB0_5;
; CHECK-NEXT:  // %bb.3: // %udiv-bb1
; CHECK-NEXT:    add.cc.s64 %rd118, %rd66, 1;
; CHECK-NEXT:    addc.cc.s64 %rd119, %rd8, 0;
; CHECK-NEXT:    or.b64 %rd71, %rd118, %rd119;
; CHECK-NEXT:    setp.eq.b64 %p18, %rd71, 0;
; CHECK-NEXT:    cvt.u32.u64 %r5, %rd66;
; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
; CHECK-NEXT:    shl.b64 %rd72, %rd4, %r6;
; CHECK-NEXT:    sub.s32 %r7, 64, %r6;
; CHECK-NEXT:    shr.u64 %rd73, %rd3, %r7;
; CHECK-NEXT:    or.b64 %rd74, %rd72, %rd73;
; CHECK-NEXT:    sub.s32 %r8, 63, %r5;
; CHECK-NEXT:    shl.b64 %rd75, %rd3, %r8;
; CHECK-NEXT:    setp.gt.s32 %p19, %r6, 63;
; CHECK-NEXT:    selp.b64 %rd123, %rd75, %rd74, %p19;
; CHECK-NEXT:    shl.b64 %rd122, %rd3, %r6;
; CHECK-NEXT:    mov.b64 %rd113, %rd116;
; CHECK-NEXT:    @%p18 bra $L__BB0_4;
; CHECK-NEXT:  // %bb.1: // %udiv-preheader
; CHECK-NEXT:    cvt.u32.u64 %r9, %rd118;
; CHECK-NEXT:    shr.u64 %rd78, %rd3, %r9;
; CHECK-NEXT:    sub.s32 %r10, 64, %r9;
; CHECK-NEXT:    shl.b64 %rd79, %rd4, %r10;
; CHECK-NEXT:    or.b64 %rd80, %rd78, %rd79;
; CHECK-NEXT:    add.s32 %r11, %r9, -64;
; CHECK-NEXT:    shr.u64 %rd81, %rd4, %r11;
; CHECK-NEXT:    setp.gt.s32 %p20, %r9, 63;
; CHECK-NEXT:    selp.b64 %rd120, %rd81, %rd80, %p20;
; CHECK-NEXT:    shr.u64 %rd121, %rd4, %r9;
; CHECK-NEXT:    add.cc.s64 %rd35, %rd5, -1;
; CHECK-NEXT:    addc.cc.s64 %rd36, %rd6, -1;
; CHECK-NEXT:    mov.b64 %rd113, 0;
; CHECK-NEXT:    mov.b64 %rd116, %rd113;
; CHECK-NEXT:  $L__BB0_2: // %udiv-do-while
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    shr.u64 %rd82, %rd120, 63;
; CHECK-NEXT:    shl.b64 %rd83, %rd121, 1;
; CHECK-NEXT:    or.b64 %rd84, %rd83, %rd82;
; CHECK-NEXT:    shl.b64 %rd85, %rd120, 1;
; CHECK-NEXT:    shr.u64 %rd86, %rd123, 63;
; CHECK-NEXT:    or.b64 %rd87, %rd85, %rd86;
; CHECK-NEXT:    shr.u64 %rd88, %rd122, 63;
; CHECK-NEXT:    shl.b64 %rd89, %rd123, 1;
; CHECK-NEXT:    or.b64 %rd90, %rd89, %rd88;
; CHECK-NEXT:    shl.b64 %rd91, %rd122, 1;
; CHECK-NEXT:    or.b64 %rd122, %rd116, %rd91;
; CHECK-NEXT:    or.b64 %rd123, %rd113, %rd90;
; CHECK-NEXT:    sub.cc.s64 %rd92, %rd35, %rd87;
; CHECK-NEXT:    subc.cc.s64 %rd93, %rd36, %rd84;
; CHECK-NEXT:    shr.s64 %rd94, %rd93, 63;
; CHECK-NEXT:    and.b64 %rd116, %rd94, 1;
; CHECK-NEXT:    and.b64 %rd95, %rd94, %rd5;
; CHECK-NEXT:    and.b64 %rd96, %rd94, %rd6;
; CHECK-NEXT:    sub.cc.s64 %rd120, %rd87, %rd95;
; CHECK-NEXT:    subc.cc.s64 %rd121, %rd84, %rd96;
; CHECK-NEXT:    add.cc.s64 %rd118, %rd118, -1;
; CHECK-NEXT:    addc.cc.s64 %rd119, %rd119, -1;
; CHECK-NEXT:    or.b64 %rd97, %rd118, %rd119;
; CHECK-NEXT:    setp.eq.b64 %p21, %rd97, 0;
; CHECK-NEXT:    @%p21 bra $L__BB0_4;
; CHECK-NEXT:    bra.uni $L__BB0_2;
; CHECK-NEXT:  $L__BB0_4: // %udiv-loop-exit
; CHECK-NEXT:    shr.u64 %rd98, %rd122, 63;
; CHECK-NEXT:    shl.b64 %rd99, %rd123, 1;
; CHECK-NEXT:    or.b64 %rd100, %rd99, %rd98;
; CHECK-NEXT:    shl.b64 %rd101, %rd122, 1;
; CHECK-NEXT:    or.b64 %rd124, %rd116, %rd101;
; CHECK-NEXT:    or.b64 %rd125, %rd113, %rd100;
; CHECK-NEXT:  $L__BB0_5: // %udiv-end
; CHECK-NEXT:    mul.hi.u64 %rd102, %rd5, %rd124;
; CHECK-NEXT:    mad.lo.s64 %rd103, %rd5, %rd125, %rd102;
; CHECK-NEXT:    mad.lo.s64 %rd104, %rd6, %rd124, %rd103;
; CHECK-NEXT:    mul.lo.s64 %rd105, %rd5, %rd124;
; CHECK-NEXT:    sub.cc.s64 %rd106, %rd3, %rd105;
; CHECK-NEXT:    subc.cc.s64 %rd107, %rd4, %rd104;
; CHECK-NEXT:    xor.b64 %rd108, %rd106, %rd2;
; CHECK-NEXT:    xor.b64 %rd109, %rd107, %rd2;
; CHECK-NEXT:    sub.cc.s64 %rd110, %rd108, %rd2;
; CHECK-NEXT:    subc.cc.s64 %rd111, %rd109, %rd2;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd110, %rd111};
; CHECK-NEXT:    ret;
  %div = srem i128 %lhs, %rhs
  ret i128 %div
}

define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: urem_i128(
; CHECK:       {
; CHECK-NEXT:    .reg .pred %p<18>;
; CHECK-NEXT:    .reg .b32 %r<12>;
; CHECK-NEXT:    .reg .b64 %rd<111>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
; CHECK-NEXT:    ld.param.v2.b64 {%rd41, %rd42}, [urem_i128_param_0];
; CHECK-NEXT:    ld.param.v2.b64 {%rd3, %rd4}, [urem_i128_param_1];
; CHECK-NEXT:    or.b64 %rd45, %rd3, %rd4;
; CHECK-NEXT:    setp.eq.b64 %p1, %rd45, 0;
; CHECK-NEXT:    or.b64 %rd46, %rd41, %rd42;
; CHECK-NEXT:    setp.eq.b64 %p2, %rd46, 0;
; CHECK-NEXT:    or.pred %p3, %p1, %p2;
; CHECK-NEXT:    setp.ne.b64 %p4, %rd4, 0;
; CHECK-NEXT:    clz.b64 %r1, %rd4;
; CHECK-NEXT:    cvt.u64.u32 %rd47, %r1;
; CHECK-NEXT:    clz.b64 %r2, %rd3;
; CHECK-NEXT:    cvt.u64.u32 %rd48, %r2;
; CHECK-NEXT:    add.s64 %rd49, %rd48, 64;
; CHECK-NEXT:    selp.b64 %rd50, %rd47, %rd49, %p4;
; CHECK-NEXT:    setp.ne.b64 %p5, %rd42, 0;
; CHECK-NEXT:    clz.b64 %r3, %rd42;
; CHECK-NEXT:    cvt.u64.u32 %rd51, %r3;
; CHECK-NEXT:    clz.b64 %r4, %rd41;
; CHECK-NEXT:    cvt.u64.u32 %rd52, %r4;
; CHECK-NEXT:    add.s64 %rd53, %rd52, 64;
; CHECK-NEXT:    selp.b64 %rd54, %rd51, %rd53, %p5;
; CHECK-NEXT:    mov.b64 %rd101, 0;
; CHECK-NEXT:    sub.cc.s64 %rd5, %rd50, %rd54;
; CHECK-NEXT:    subc.cc.s64 %rd6, %rd101, 0;
; CHECK-NEXT:    setp.gt.u64 %p6, %rd5, 127;
; CHECK-NEXT:    setp.eq.b64 %p7, %rd6, 0;
; CHECK-NEXT:    and.pred %p8, %p7, %p6;
; CHECK-NEXT:    setp.ne.b64 %p9, %rd6, 0;
; CHECK-NEXT:    or.pred %p10, %p8, %p9;
; CHECK-NEXT:    or.pred %p11, %p3, %p10;
; CHECK-NEXT:    xor.b64 %rd56, %rd5, 127;
; CHECK-NEXT:    or.b64 %rd57, %rd56, %rd6;
; CHECK-NEXT:    setp.eq.b64 %p12, %rd57, 0;
; CHECK-NEXT:    selp.b64 %rd110, 0, %rd42, %p11;
; CHECK-NEXT:    selp.b64 %rd109, 0, %rd41, %p11;
; CHECK-NEXT:    or.pred %p13, %p11, %p12;
; CHECK-NEXT:    @%p13 bra $L__BB1_5;
; CHECK-NEXT:  // %bb.3: // %udiv-bb1
; CHECK-NEXT:    add.cc.s64 %rd103, %rd5, 1;
; CHECK-NEXT:    addc.cc.s64 %rd104, %rd6, 0;
; CHECK-NEXT:    or.b64 %rd60, %rd103, %rd104;
; CHECK-NEXT:    setp.eq.b64 %p14, %rd60, 0;
; CHECK-NEXT:    cvt.u32.u64 %r5, %rd5;
; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
; CHECK-NEXT:    shl.b64 %rd61, %rd42, %r6;
; CHECK-NEXT:    sub.s32 %r7, 64, %r6;
; CHECK-NEXT:    shr.u64 %rd62, %rd41, %r7;
; CHECK-NEXT:    or.b64 %rd63, %rd61, %rd62;
; CHECK-NEXT:    sub.s32 %r8, 63, %r5;
; CHECK-NEXT:    shl.b64 %rd64, %rd41, %r8;
; CHECK-NEXT:    setp.gt.s32 %p15, %r6, 63;
; CHECK-NEXT:    selp.b64 %rd108, %rd64, %rd63, %p15;
; CHECK-NEXT:    shl.b64 %rd107, %rd41, %r6;
; CHECK-NEXT:    mov.b64 %rd98, %rd101;
; CHECK-NEXT:    @%p14 bra $L__BB1_4;
; CHECK-NEXT:  // %bb.1: // %udiv-preheader
; CHECK-NEXT:    cvt.u32.u64 %r9, %rd103;
; CHECK-NEXT:    shr.u64 %rd67, %rd41, %r9;
; CHECK-NEXT:    sub.s32 %r10, 64, %r9;
; CHECK-NEXT:    shl.b64 %rd68, %rd42, %r10;
; CHECK-NEXT:    or.b64 %rd69, %rd67, %rd68;
; CHECK-NEXT:    add.s32 %r11, %r9, -64;
; CHECK-NEXT:    shr.u64 %rd70, %rd42, %r11;
; CHECK-NEXT:    setp.gt.s32 %p16, %r9, 63;
; CHECK-NEXT:    selp.b64 %rd105, %rd70, %rd69, %p16;
; CHECK-NEXT:    shr.u64 %rd106, %rd42, %r9;
; CHECK-NEXT:    add.cc.s64 %rd33, %rd3, -1;
; CHECK-NEXT:    addc.cc.s64 %rd34, %rd4, -1;
; CHECK-NEXT:    mov.b64 %rd98, 0;
; CHECK-NEXT:    mov.b64 %rd101, %rd98;
; CHECK-NEXT:  $L__BB1_2: // %udiv-do-while
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    shr.u64 %rd71, %rd105, 63;
; CHECK-NEXT:    shl.b64 %rd72, %rd106, 1;
; CHECK-NEXT:    or.b64 %rd73, %rd72, %rd71;
; CHECK-NEXT:    shl.b64 %rd74, %rd105, 1;
; CHECK-NEXT:    shr.u64 %rd75, %rd108, 63;
; CHECK-NEXT:    or.b64 %rd76, %rd74, %rd75;
; CHECK-NEXT:    shr.u64 %rd77, %rd107, 63;
; CHECK-NEXT:    shl.b64 %rd78, %rd108, 1;
; CHECK-NEXT:    or.b64 %rd79, %rd78, %rd77;
; CHECK-NEXT:    shl.b64 %rd80, %rd107, 1;
; CHECK-NEXT:    or.b64 %rd107, %rd101, %rd80;
; CHECK-NEXT:    or.b64 %rd108, %rd98, %rd79;
; CHECK-NEXT:    sub.cc.s64 %rd81, %rd33, %rd76;
; CHECK-NEXT:    subc.cc.s64 %rd82, %rd34, %rd73;
; CHECK-NEXT:    shr.s64 %rd83, %rd82, 63;
; CHECK-NEXT:    and.b64 %rd101, %rd83, 1;
; CHECK-NEXT:    and.b64 %rd84, %rd83, %rd3;
; CHECK-NEXT:    and.b64 %rd85, %rd83, %rd4;
; CHECK-NEXT:    sub.cc.s64 %rd105, %rd76, %rd84;
; CHECK-NEXT:    subc.cc.s64 %rd106, %rd73, %rd85;
; CHECK-NEXT:    add.cc.s64 %rd103, %rd103, -1;
; CHECK-NEXT:    addc.cc.s64 %rd104, %rd104, -1;
; CHECK-NEXT:    or.b64 %rd86, %rd103, %rd104;
; CHECK-NEXT:    setp.eq.b64 %p17, %rd86, 0;
; CHECK-NEXT:    @%p17 bra $L__BB1_4;
; CHECK-NEXT:    bra.uni $L__BB1_2;
; CHECK-NEXT:  $L__BB1_4: // %udiv-loop-exit
; CHECK-NEXT:    shr.u64 %rd87, %rd107, 63;
; CHECK-NEXT:    shl.b64 %rd88, %rd108, 1;
; CHECK-NEXT:    or.b64 %rd89, %rd88, %rd87;
; CHECK-NEXT:    shl.b64 %rd90, %rd107, 1;
; CHECK-NEXT:    or.b64 %rd109, %rd101, %rd90;
; CHECK-NEXT:    or.b64 %rd110, %rd98, %rd89;
; CHECK-NEXT:  $L__BB1_5: // %udiv-end
; CHECK-NEXT:    mul.hi.u64 %rd91, %rd3, %rd109;
; CHECK-NEXT:    mad.lo.s64 %rd92, %rd3, %rd110, %rd91;
; CHECK-NEXT:    mad.lo.s64 %rd93, %rd4, %rd109, %rd92;
; CHECK-NEXT:    mul.lo.s64 %rd94, %rd3, %rd109;
; CHECK-NEXT:    sub.cc.s64 %rd95, %rd41, %rd94;
; CHECK-NEXT:    subc.cc.s64 %rd96, %rd42, %rd93;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd95, %rd96};
; CHECK-NEXT:    ret;
  %div = urem i128 %lhs, %rhs
  ret i128 %div
}

define i128 @srem_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: srem_i128_pow2k(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<10>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [srem_i128_pow2k_param_0];
; CHECK-NEXT:    shr.s64 %rd3, %rd2, 63;
; CHECK-NEXT:    shr.u64 %rd4, %rd3, 31;
; CHECK-NEXT:    add.cc.s64 %rd5, %rd1, %rd4;
; CHECK-NEXT:    addc.cc.s64 %rd6, %rd2, 0;
; CHECK-NEXT:    and.b64 %rd7, %rd5, -8589934592;
; CHECK-NEXT:    sub.cc.s64 %rd8, %rd1, %rd7;
; CHECK-NEXT:    subc.cc.s64 %rd9, %rd2, %rd6;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd8, %rd9};
; CHECK-NEXT:    ret;
  %div = srem i128 %lhs, 8589934592
  ret i128 %div
}

define i128 @urem_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: urem_i128_pow2k(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_pow2k_param_0];
; CHECK-NEXT:    and.b64 %rd3, %rd1, 8589934591;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd3, 0};
; CHECK-NEXT:    ret;
  %div = urem i128 %lhs, 8589934592
  ret i128 %div
}

define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: sdiv_i128(
; CHECK:       {
; CHECK-NEXT:    .reg .pred %p<22>;
; CHECK-NEXT:    .reg .b32 %r<12>;
; CHECK-NEXT:    .reg .b64 %rd<121>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
; CHECK-NEXT:    ld.param.v2.b64 {%rd45, %rd46}, [sdiv_i128_param_0];
; CHECK-NEXT:    ld.param.v2.b64 {%rd49, %rd50}, [sdiv_i128_param_1];
; CHECK-NEXT:    sub.cc.s64 %rd51, 0, %rd45;
; CHECK-NEXT:    subc.cc.s64 %rd52, 0, %rd46;
; CHECK-NEXT:    setp.lt.s64 %p1, %rd46, 0;
; CHECK-NEXT:    selp.b64 %rd2, %rd52, %rd46, %p1;
; CHECK-NEXT:    selp.b64 %rd1, %rd51, %rd45, %p1;
; CHECK-NEXT:    sub.cc.s64 %rd53, 0, %rd49;
; CHECK-NEXT:    subc.cc.s64 %rd54, 0, %rd50;
; CHECK-NEXT:    setp.lt.s64 %p2, %rd50, 0;
; CHECK-NEXT:    selp.b64 %rd4, %rd54, %rd50, %p2;
; CHECK-NEXT:    selp.b64 %rd3, %rd53, %rd49, %p2;
; CHECK-NEXT:    xor.b64 %rd55, %rd50, %rd46;
; CHECK-NEXT:    shr.s64 %rd5, %rd55, 63;
; CHECK-NEXT:    or.b64 %rd56, %rd3, %rd4;
; CHECK-NEXT:    setp.eq.b64 %p3, %rd56, 0;
; CHECK-NEXT:    or.b64 %rd57, %rd1, %rd2;
; CHECK-NEXT:    setp.eq.b64 %p4, %rd57, 0;
; CHECK-NEXT:    or.pred %p5, %p3, %p4;
; CHECK-NEXT:    setp.ne.b64 %p6, %rd4, 0;
; CHECK-NEXT:    clz.b64 %r1, %rd4;
; CHECK-NEXT:    cvt.u64.u32 %rd58, %r1;
; CHECK-NEXT:    clz.b64 %r2, %rd3;
; CHECK-NEXT:    cvt.u64.u32 %rd59, %r2;
; CHECK-NEXT:    add.s64 %rd60, %rd59, 64;
; CHECK-NEXT:    selp.b64 %rd61, %rd58, %rd60, %p6;
; CHECK-NEXT:    setp.ne.b64 %p7, %rd2, 0;
; CHECK-NEXT:    clz.b64 %r3, %rd2;
; CHECK-NEXT:    cvt.u64.u32 %rd62, %r3;
; CHECK-NEXT:    clz.b64 %r4, %rd1;
; CHECK-NEXT:    cvt.u64.u32 %rd63, %r4;
; CHECK-NEXT:    add.s64 %rd64, %rd63, 64;
; CHECK-NEXT:    selp.b64 %rd65, %rd62, %rd64, %p7;
; CHECK-NEXT:    mov.b64 %rd111, 0;
; CHECK-NEXT:    sub.cc.s64 %rd67, %rd61, %rd65;
; CHECK-NEXT:    subc.cc.s64 %rd8, %rd111, 0;
; CHECK-NEXT:    setp.ne.b64 %p8, %rd8, 0;
; CHECK-NEXT:    and.pred %p10, %p8, %p8;
; CHECK-NEXT:    setp.eq.b64 %p11, %rd8, 0;
; CHECK-NEXT:    setp.gt.u64 %p12, %rd67, 127;
; CHECK-NEXT:    and.pred %p13, %p11, %p12;
; CHECK-NEXT:    or.pred %p14, %p13, %p10;
; CHECK-NEXT:    or.pred %p15, %p5, %p14;
; CHECK-NEXT:    xor.b64 %rd68, %rd67, 127;
; CHECK-NEXT:    or.b64 %rd69, %rd68, %rd8;
; CHECK-NEXT:    setp.eq.b64 %p16, %rd69, 0;
; CHECK-NEXT:    selp.b64 %rd120, 0, %rd2, %p15;
; CHECK-NEXT:    selp.b64 %rd119, 0, %rd1, %p15;
; CHECK-NEXT:    or.pred %p17, %p15, %p16;
; CHECK-NEXT:    @%p17 bra $L__BB4_5;
; CHECK-NEXT:  // %bb.3: // %udiv-bb1
; CHECK-NEXT:    add.cc.s64 %rd113, %rd67, 1;
; CHECK-NEXT:    addc.cc.s64 %rd114, %rd8, 0;
; CHECK-NEXT:    or.b64 %rd72, %rd113, %rd114;
; CHECK-NEXT:    setp.eq.b64 %p18, %rd72, 0;
; CHECK-NEXT:    cvt.u32.u64 %r5, %rd67;
; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
; CHECK-NEXT:    shl.b64 %rd73, %rd2, %r6;
; CHECK-NEXT:    sub.s32 %r7, 64, %r6;
; CHECK-NEXT:    shr.u64 %rd74, %rd1, %r7;
; CHECK-NEXT:    or.b64 %rd75, %rd73, %rd74;
; CHECK-NEXT:    sub.s32 %r8, 63, %r5;
; CHECK-NEXT:    shl.b64 %rd76, %rd1, %r8;
; CHECK-NEXT:    setp.gt.s32 %p19, %r6, 63;
; CHECK-NEXT:    selp.b64 %rd118, %rd76, %rd75, %p19;
; CHECK-NEXT:    shl.b64 %rd117, %rd1, %r6;
; CHECK-NEXT:    mov.b64 %rd108, %rd111;
; CHECK-NEXT:    @%p18 bra $L__BB4_4;
; CHECK-NEXT:  // %bb.1: // %udiv-preheader
; CHECK-NEXT:    cvt.u32.u64 %r9, %rd113;
; CHECK-NEXT:    shr.u64 %rd79, %rd1, %r9;
; CHECK-NEXT:    sub.s32 %r10, 64, %r9;
; CHECK-NEXT:    shl.b64 %rd80, %rd2, %r10;
; CHECK-NEXT:    or.b64 %rd81, %rd79, %rd80;
; CHECK-NEXT:    add.s32 %r11, %r9, -64;
; CHECK-NEXT:    shr.u64 %rd82, %rd2, %r11;
; CHECK-NEXT:    setp.gt.s32 %p20, %r9, 63;
; CHECK-NEXT:    selp.b64 %rd115, %rd82, %rd81, %p20;
; CHECK-NEXT:    shr.u64 %rd116, %rd2, %r9;
; CHECK-NEXT:    add.cc.s64 %rd35, %rd3, -1;
; CHECK-NEXT:    addc.cc.s64 %rd36, %rd4, -1;
; CHECK-NEXT:    mov.b64 %rd108, 0;
; CHECK-NEXT:    mov.b64 %rd111, %rd108;
; CHECK-NEXT:  $L__BB4_2: // %udiv-do-while
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    shr.u64 %rd83, %rd115, 63;
; CHECK-NEXT:    shl.b64 %rd84, %rd116, 1;
; CHECK-NEXT:    or.b64 %rd85, %rd84, %rd83;
; CHECK-NEXT:    shl.b64 %rd86, %rd115, 1;
; CHECK-NEXT:    shr.u64 %rd87, %rd118, 63;
; CHECK-NEXT:    or.b64 %rd88, %rd86, %rd87;
; CHECK-NEXT:    shr.u64 %rd89, %rd117, 63;
; CHECK-NEXT:    shl.b64 %rd90, %rd118, 1;
; CHECK-NEXT:    or.b64 %rd91, %rd90, %rd89;
; CHECK-NEXT:    shl.b64 %rd92, %rd117, 1;
; CHECK-NEXT:    or.b64 %rd117, %rd111, %rd92;
; CHECK-NEXT:    or.b64 %rd118, %rd108, %rd91;
; CHECK-NEXT:    sub.cc.s64 %rd93, %rd35, %rd88;
; CHECK-NEXT:    subc.cc.s64 %rd94, %rd36, %rd85;
; CHECK-NEXT:    shr.s64 %rd95, %rd94, 63;
; CHECK-NEXT:    and.b64 %rd111, %rd95, 1;
; CHECK-NEXT:    and.b64 %rd96, %rd95, %rd3;
; CHECK-NEXT:    and.b64 %rd97, %rd95, %rd4;
; CHECK-NEXT:    sub.cc.s64 %rd115, %rd88, %rd96;
; CHECK-NEXT:    subc.cc.s64 %rd116, %rd85, %rd97;
; CHECK-NEXT:    add.cc.s64 %rd113, %rd113, -1;
; CHECK-NEXT:    addc.cc.s64 %rd114, %rd114, -1;
; CHECK-NEXT:    or.b64 %rd98, %rd113, %rd114;
; CHECK-NEXT:    setp.eq.b64 %p21, %rd98, 0;
; CHECK-NEXT:    @%p21 bra $L__BB4_4;
; CHECK-NEXT:    bra.uni $L__BB4_2;
; CHECK-NEXT:  $L__BB4_4: // %udiv-loop-exit
; CHECK-NEXT:    shr.u64 %rd99, %rd117, 63;
; CHECK-NEXT:    shl.b64 %rd100, %rd118, 1;
; CHECK-NEXT:    or.b64 %rd101, %rd100, %rd99;
; CHECK-NEXT:    shl.b64 %rd102, %rd117, 1;
; CHECK-NEXT:    or.b64 %rd119, %rd111, %rd102;
; CHECK-NEXT:    or.b64 %rd120, %rd108, %rd101;
; CHECK-NEXT:  $L__BB4_5: // %udiv-end
; CHECK-NEXT:    xor.b64 %rd103, %rd119, %rd5;
; CHECK-NEXT:    xor.b64 %rd104, %rd120, %rd5;
; CHECK-NEXT:    sub.cc.s64 %rd105, %rd103, %rd5;
; CHECK-NEXT:    subc.cc.s64 %rd106, %rd104, %rd5;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd105, %rd106};
; CHECK-NEXT:    ret;
  %div = sdiv i128 %lhs, %rhs
  ret i128 %div
}

define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: udiv_i128(
; CHECK:       {
; CHECK-NEXT:    .reg .pred %p<18>;
; CHECK-NEXT:    .reg .b32 %r<12>;
; CHECK-NEXT:    .reg .b64 %rd<105>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
; CHECK-NEXT:    ld.param.v2.b64 {%rd41, %rd42}, [udiv_i128_param_0];
; CHECK-NEXT:    ld.param.v2.b64 {%rd43, %rd44}, [udiv_i128_param_1];
; CHECK-NEXT:    or.b64 %rd45, %rd43, %rd44;
; CHECK-NEXT:    setp.eq.b64 %p1, %rd45, 0;
; CHECK-NEXT:    or.b64 %rd46, %rd41, %rd42;
; CHECK-NEXT:    setp.eq.b64 %p2, %rd46, 0;
; CHECK-NEXT:    or.pred %p3, %p1, %p2;
; CHECK-NEXT:    setp.ne.b64 %p4, %rd44, 0;
; CHECK-NEXT:    clz.b64 %r1, %rd44;
; CHECK-NEXT:    cvt.u64.u32 %rd47, %r1;
; CHECK-NEXT:    clz.b64 %r2, %rd43;
; CHECK-NEXT:    cvt.u64.u32 %rd48, %r2;
; CHECK-NEXT:    add.s64 %rd49, %rd48, 64;
; CHECK-NEXT:    selp.b64 %rd50, %rd47, %rd49, %p4;
; CHECK-NEXT:    setp.ne.b64 %p5, %rd42, 0;
; CHECK-NEXT:    clz.b64 %r3, %rd42;
; CHECK-NEXT:    cvt.u64.u32 %rd51, %r3;
; CHECK-NEXT:    clz.b64 %r4, %rd41;
; CHECK-NEXT:    cvt.u64.u32 %rd52, %r4;
; CHECK-NEXT:    add.s64 %rd53, %rd52, 64;
; CHECK-NEXT:    selp.b64 %rd54, %rd51, %rd53, %p5;
; CHECK-NEXT:    mov.b64 %rd95, 0;
; CHECK-NEXT:    sub.cc.s64 %rd5, %rd50, %rd54;
; CHECK-NEXT:    subc.cc.s64 %rd6, %rd95, 0;
; CHECK-NEXT:    setp.gt.u64 %p6, %rd5, 127;
; CHECK-NEXT:    setp.eq.b64 %p7, %rd6, 0;
; CHECK-NEXT:    and.pred %p8, %p7, %p6;
; CHECK-NEXT:    setp.ne.b64 %p9, %rd6, 0;
; CHECK-NEXT:    or.pred %p10, %p8, %p9;
; CHECK-NEXT:    or.pred %p11, %p3, %p10;
; CHECK-NEXT:    xor.b64 %rd56, %rd5, 127;
; CHECK-NEXT:    or.b64 %rd57, %rd56, %rd6;
; CHECK-NEXT:    setp.eq.b64 %p12, %rd57, 0;
; CHECK-NEXT:    selp.b64 %rd104, 0, %rd42, %p11;
; CHECK-NEXT:    selp.b64 %rd103, 0, %rd41, %p11;
; CHECK-NEXT:    or.pred %p13, %p11, %p12;
; CHECK-NEXT:    @%p13 bra $L__BB5_5;
; CHECK-NEXT:  // %bb.3: // %udiv-bb1
; CHECK-NEXT:    add.cc.s64 %rd97, %rd5, 1;
; CHECK-NEXT:    addc.cc.s64 %rd98, %rd6, 0;
; CHECK-NEXT:    or.b64 %rd60, %rd97, %rd98;
; CHECK-NEXT:    setp.eq.b64 %p14, %rd60, 0;
; CHECK-NEXT:    cvt.u32.u64 %r5, %rd5;
; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
; CHECK-NEXT:    shl.b64 %rd61, %rd42, %r6;
; CHECK-NEXT:    sub.s32 %r7, 64, %r6;
; CHECK-NEXT:    shr.u64 %rd62, %rd41, %r7;
; CHECK-NEXT:    or.b64 %rd63, %rd61, %rd62;
; CHECK-NEXT:    sub.s32 %r8, 63, %r5;
; CHECK-NEXT:    shl.b64 %rd64, %rd41, %r8;
; CHECK-NEXT:    setp.gt.s32 %p15, %r6, 63;
; CHECK-NEXT:    selp.b64 %rd102, %rd64, %rd63, %p15;
; CHECK-NEXT:    shl.b64 %rd101, %rd41, %r6;
; CHECK-NEXT:    mov.b64 %rd92, %rd95;
; CHECK-NEXT:    @%p14 bra $L__BB5_4;
; CHECK-NEXT:  // %bb.1: // %udiv-preheader
; CHECK-NEXT:    cvt.u32.u64 %r9, %rd97;
; CHECK-NEXT:    shr.u64 %rd67, %rd41, %r9;
; CHECK-NEXT:    sub.s32 %r10, 64, %r9;
; CHECK-NEXT:    shl.b64 %rd68, %rd42, %r10;
; CHECK-NEXT:    or.b64 %rd69, %rd67, %rd68;
; CHECK-NEXT:    add.s32 %r11, %r9, -64;
; CHECK-NEXT:    shr.u64 %rd70, %rd42, %r11;
; CHECK-NEXT:    setp.gt.s32 %p16, %r9, 63;
; CHECK-NEXT:    selp.b64 %rd99, %rd70, %rd69, %p16;
; CHECK-NEXT:    shr.u64 %rd100, %rd42, %r9;
; CHECK-NEXT:    add.cc.s64 %rd33, %rd43, -1;
; CHECK-NEXT:    addc.cc.s64 %rd34, %rd44, -1;
; CHECK-NEXT:    mov.b64 %rd92, 0;
; CHECK-NEXT:    mov.b64 %rd95, %rd92;
; CHECK-NEXT:  $L__BB5_2: // %udiv-do-while
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    shr.u64 %rd71, %rd99, 63;
; CHECK-NEXT:    shl.b64 %rd72, %rd100, 1;
; CHECK-NEXT:    or.b64 %rd73, %rd72, %rd71;
; CHECK-NEXT:    shl.b64 %rd74, %rd99, 1;
; CHECK-NEXT:    shr.u64 %rd75, %rd102, 63;
; CHECK-NEXT:    or.b64 %rd76, %rd74, %rd75;
; CHECK-NEXT:    shr.u64 %rd77, %rd101, 63;
; CHECK-NEXT:    shl.b64 %rd78, %rd102, 1;
; CHECK-NEXT:    or.b64 %rd79, %rd78, %rd77;
; CHECK-NEXT:    shl.b64 %rd80, %rd101, 1;
; CHECK-NEXT:    or.b64 %rd101, %rd95, %rd80;
; CHECK-NEXT:    or.b64 %rd102, %rd92, %rd79;
; CHECK-NEXT:    sub.cc.s64 %rd81, %rd33, %rd76;
; CHECK-NEXT:    subc.cc.s64 %rd82, %rd34, %rd73;
; CHECK-NEXT:    shr.s64 %rd83, %rd82, 63;
; CHECK-NEXT:    and.b64 %rd95, %rd83, 1;
; CHECK-NEXT:    and.b64 %rd84, %rd83, %rd43;
; CHECK-NEXT:    and.b64 %rd85, %rd83, %rd44;
; CHECK-NEXT:    sub.cc.s64 %rd99, %rd76, %rd84;
; CHECK-NEXT:    subc.cc.s64 %rd100, %rd73, %rd85;
; CHECK-NEXT:    add.cc.s64 %rd97, %rd97, -1;
; CHECK-NEXT:    addc.cc.s64 %rd98, %rd98, -1;
; CHECK-NEXT:    or.b64 %rd86, %rd97, %rd98;
; CHECK-NEXT:    setp.eq.b64 %p17, %rd86, 0;
; CHECK-NEXT:    @%p17 bra $L__BB5_4;
; CHECK-NEXT:    bra.uni $L__BB5_2;
; CHECK-NEXT:  $L__BB5_4: // %udiv-loop-exit
; CHECK-NEXT:    shr.u64 %rd87, %rd101, 63;
; CHECK-NEXT:    shl.b64 %rd88, %rd102, 1;
; CHECK-NEXT:    or.b64 %rd89, %rd88, %rd87;
; CHECK-NEXT:    shl.b64 %rd90, %rd101, 1;
; CHECK-NEXT:    or.b64 %rd103, %rd95, %rd90;
; CHECK-NEXT:    or.b64 %rd104, %rd92, %rd89;
; CHECK-NEXT:  $L__BB5_5: // %udiv-end
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd103, %rd104};
; CHECK-NEXT:    ret;
  %div = udiv i128 %lhs, %rhs
  ret i128 %div
}

define i128 @sdiv_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: sdiv_i128_pow2k(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<11>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0];
; CHECK-NEXT:    shr.s64 %rd3, %rd2, 63;
; CHECK-NEXT:    shr.u64 %rd4, %rd3, 31;
; CHECK-NEXT:    add.cc.s64 %rd5, %rd1, %rd4;
; CHECK-NEXT:    addc.cc.s64 %rd6, %rd2, 0;
; CHECK-NEXT:    shl.b64 %rd7, %rd6, 31;
; CHECK-NEXT:    shr.u64 %rd8, %rd5, 33;
; CHECK-NEXT:    or.b64 %rd9, %rd8, %rd7;
; CHECK-NEXT:    shr.s64 %rd10, %rd6, 33;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd9, %rd10};
; CHECK-NEXT:    ret;
  %div = sdiv i128 %lhs, 8589934592
  ret i128 %div
}

define i128 @udiv_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: udiv_i128_pow2k(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<7>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0];
; CHECK-NEXT:    shl.b64 %rd3, %rd2, 31;
; CHECK-NEXT:    shr.u64 %rd4, %rd1, 33;
; CHECK-NEXT:    or.b64 %rd5, %rd4, %rd3;
; CHECK-NEXT:    shr.u64 %rd6, %rd2, 33;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd5, %rd6};
; CHECK-NEXT:    ret;
  %div = udiv i128 %lhs, 8589934592
  ret i128 %div
}

define i128 @add_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: add_i128(
; CHECK:       {
; CHECK-NEXT:    .reg .b64 %rd<7>;
; CHECK-EMPTY:
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [add_i128_param_0];
; CHECK-NEXT:    ld.param.v2.b64 {%rd3, %rd4}, [add_i128_param_1];
; CHECK-NEXT:    add.cc.s64 %rd5, %rd1, %rd3;
; CHECK-NEXT:    addc.cc.s64 %rd6, %rd2, %rd4;
; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd5, %rd6};
; CHECK-NEXT:    ret;
  %result = add i128 %lhs, %rhs
  ret i128 %result
}
