--- support.S 2020-05-31 15:02:07.000000000 -0500 +++ sys/powerpc/powerpc/support.S 2020-05-31 22:52:49.487251000 -0500 @@ -46,6 +46,7 @@ #ifdef __powerpc64__ #define LOAD ld #define STORE std +#define COMPARE cmpdi #define WORD 8 /* log_2(8 * WORD) */ #define LOOP_LOG 6 @@ -53,6 +54,7 @@ #define LOAD lwz #define STORE stw #define WORD 4 +#define COMPARE cmpwi /* log_2(8 * WORD) */ #define LOOP_LOG 5 #endif @@ -158,6 +160,35 @@ #define Thresh 64 .text + +.Lfailsafe: + /* Dumb byte copy version */ + cmplwi 0, %r5, 0 + beq .Lend + mtctr rl +#ifndef __powerpc64__ + cmpl 0, 0, rs, rd +#else + cmpl 0, 1, rs, rd +#endif + bgt 2f /* src > dst, forward copy possible */ + add rs, rs, rl + add rd, rd, rl +1: + lbz t1, 0(rs) + addi rs, rs, -1 + stb t1, 0(rd) + addi rd, rd, -1 + bdnz 1b + b .Lend +2: + lbz t1, 0(rs) + addi rs, rs, 1 + stb t1, 0(rd) + addi rd, rd, 1 + bdnz 2b + b .Lend + ENTRY(bcopy_generic) cmplwi 0, %r5, 0 beq .Lend @@ -166,15 +197,16 @@ ENTRY(bcopy_generic) cmplwi rl, Thresh sub t1, rs, rd cmplw cr1, t1, rl // (dst-src) < len + blt- cr1, .Lfailsafe blt .Lsmall b .Llarge /* memcpy */ /* ... */ .Lsmall: - mtcrf 0x3, rl -// blt cr1, .Lsmallrev + mtcrf 0x3, rl // load LSB 8 bits + .Lsmall64: - bf 26, 0f + bf 26, .Lsmall32 // <32 bytes remain LOAD t1, 0(rs) LOAD t2, WORD*1(rs) LOAD t3, WORD*2(rs) @@ -197,8 +229,8 @@ ENTRY(bcopy_generic) STORE t8, WORD*7(rd) #endif addi rd, rd, 32 -0: - bf 27, 1f +.Lsmall32: + bf 27, 1f // <16 bytes remain LOAD t1, 0(rs) LOAD t2, WORD*1(rs) #ifndef __powerpc64__ @@ -207,14 +239,14 @@ ENTRY(bcopy_generic) #endif addi rs, rs, 16 STORE t1, 0(rd) - STORE t2, 8(rd) + STORE t2, WORD*1(rd) #ifndef __powerpc64__ STORE t3, WORD*2(rd) STORE t4, WORD*3(rd) #endif addi rd, rd, 16 1: - bf 28, 2f + bf 28, 2f // <8 bytes remain LOAD t1, 0(rs) #ifndef __powerpc64__ LOAD t2, WORD(rs) @@ -222,23 +254,23 @@ ENTRY(bcopy_generic) addi rs, rs, 8 STORE t1, 0(rd) #ifndef __powerpc64__ - LOAD t2, WORD(rd) + STORE t2, WORD(rd) #endif addi rd, rd, 8 2: - bf 29, 3f + bf 29, 3f // < 4 bytes remain lwz t1, 0(rs) addi rs, rs, 4 stw t1, 0(rd) addi rd, rd, 4 3: - bf 30, 4f + bf 30, 4f // < 2 bytes remain lhz t1, 0(rs) addi rs, rs, 2 sth t1, 0(rd) addi rd, rd, 2 4: - bf 31, .Lout + bf 31, .Lout // 0 bytes remain lbz t1, 0(rs) addi rs, rs, 1 stb t1, 0(rd) @@ -246,13 +278,12 @@ ENTRY(bcopy_generic) b .Lout .align 4 -.Llarge: - neg t3, rd +.Llarge: // Preamble - byte copy until dest dword aligned. + neg t3, rd // NOT(rd) + 1 andi. t6, t3, 0x7 -// blt cr1, .Llargerev mtctr t6 sub rl, rl, t6 - beq+ .Llargealigned + beq+ .Llargealigned // Already aligned. 1: lbz t1, 0(rs) addi rs, rs, 1 @@ -261,10 +292,14 @@ ENTRY(bcopy_generic) bdnz 1b .Llargealigned: - srwi. t2, rl, LOOP_LOG /* length >> log_2(loop_size) => 64B iterations */ + srwi. t2, rl, LOOP_LOG /* length >> log_2(loop_size) => 64B (32B) iterations */ mtcrf 0x3, rl - beq .Lsmall64 - mtctr t2 +#ifndef __powerpc64__ + beq .Lsmall32 +#else + beq .Lsmall64 /* i.e. would be on the last loop */ +#endif + mtctr t2 /* 64 byte (32 byte) blocks */ b 1f .align 5 @@ -289,7 +324,16 @@ ENTRY(bcopy_generic) addi rd, rd, WORD*8 bdnz 1b +#ifndef __powerpc64__ + /* + * Ensure CR bit 26 is clear on ppc32, to ensure .Lsmall64 does not + * copy more than 32 bytes. + */ + //crclr 26 + b .Lsmall32 +#else b .Lsmall64 +#endif .Lout: /* done */ .Lend: @@ -349,14 +393,14 @@ ENTRY_DIRECT(copyinstr) stbu %r0, 1(%r4) // NULL byte reached ? - cmpdi %r0, 0 + COMPARE %r0, 0 beq- 1f b 0b 1: li %r3, 0 2: /* skip storing length if done is NULL */ - cmpdi %r6, 0 + COMPARE %r6, 0 beq- 3f mfctr %r0 sub %r0, %r9, %r0