Announcement

Collapse
No announcement yet.

Why are graphics as complicated as they are?

Collapse
X
 
  • Filter
  • Time
  • Show
Clear All
new posts

  • #31
    Originally posted by gens View Post
    please don't, just don't talk about audio processing
    it's an area with many many people who don't know about it (and some are "smart" non the less, that's why i don't call myself an audiophile)
    here, a low pass FIR filter
    i think it's 32 tap, cant remember

    Code:
    format ELF64 executable
    entry start
    
    align 16
    segment readable writeable
    
    samples equ 1024
    
    align 16
    buff_in: rw samples*2
    align 16
    buff_out: rw samples*2
    
    ; 2^16 = 1.0
    align 16
    coefficients:
    
    dw 5
    dw 14
    dw 35
    dw 57
    dw 39
    dw -76
    dw -328
    dw -677
    dw -945
    dw -828
    dw 16
    dw 1799
    dw 4428
    dw 7441
    dw 10109
    dw 11680
    dw 11680
    dw 10109
    dw 7441
    dw 4428
    dw 1799
    dw 16
    dw -828
    dw -945
    dw -677
    dw -328
    dw -76
    dw 39
    dw 57
    dw 35
    dw 14
    dw 5
    
    dw 705
    dw 1240
    dw 1434
    dw 1169
    dw 456
    dw -550
    dw -1576
    dw -2287
    dw -2367
    dw -1609
    dw 25
    dw 2377
    dw 5106
    dw 7760
    dw 9862
    dw 11024
    dw 11024
    dw 9862
    dw 7760
    dw 5106
    dw 2377
    dw 25
    dw -1609
    dw -2367
    dw -2287
    dw -1576
    dw -550
    dw 456
    dw 1169
    dw 1434
    dw 1240
    dw 705
    
    rq 10 ;just in case
    tmp rq 100
    
    sys_read equ 0
    sys_write equ 1
    sys_exit equ 60
    
    
    align 16
    segment readable executable
    
    start:
    
    xorps xmm7, xmm7
    xorps xmm6, xmm6
    xorps xmm5, xmm5
    xorps xmm4, xmm4
    xorps xmm3, xmm3
    xorps xmm2, xmm2
    xorps xmm1, xmm1
    xorps xmm0, xmm0
    
    mov [tmp], rdx
    mov [tmp+8], rdx
    
    filter:
    
    mov rdx, samples*2*2
    mov rsi, buff_in
    mov rdi, 0
    mov rax, sys_read
    syscall
    cmp rax, 0
    jz end_fir
    
    xor rax, rax
    
    mov rcx, samples
    loopy:
    movaps xmm12, [coefficients]
    movaps xmm13, [coefficients+16*1]
    movaps xmm14, [coefficients+16*2]
    movaps xmm15, [coefficients+16*3]
    mov edx, [rax+buff_in] ;dx = left channel sample
    mov esi, edx
    shr esi, 16 ;si = right channel sample
    
    ; left channel delay line
    pslldq xmm3, 2
    movaps xmm10, xmm2
    psrldq xmm10, 2*7
    orpd xmm3, xmm10
    
    pslldq xmm2, 2
    movaps xmm10, xmm1
    psrldq xmm10, 2*7
    orpd xmm2, xmm10
    
    pslldq xmm1, 2
    movaps xmm10, xmm0
    psrldq xmm10, 2*7
    orpd xmm1, xmm10
    
    pslldq xmm0, 2 ;bytes
    movzx rdx, dx
    movq xmm10, rdx
    orpd xmm0, xmm10
    
    ; right channel delay line
    pslldq xmm7, 2
    movaps xmm10, xmm6
    psrldq xmm10, 2*7
    orpd xmm7, xmm10
    
    pslldq xmm6, 2
    movaps xmm10, xmm5
    psrldq xmm10, 2*7
    orpd xmm6, xmm10
    
    pslldq xmm5, 2
    movaps xmm10, xmm4
    psrldq xmm10, 2*7
    orpd xmm5, xmm10
    
    pslldq xmm4, 2
    movq xmm10, rsi
    orpd xmm4, xmm10
    
    
    
    
    movaps xmm8, xmm12
    movaps xmm9, xmm13
    movaps xmm10, xmm14
    movaps xmm11, xmm15
    
    
    pmaddwd xmm12, xmm0
    pmaddwd xmm13, xmm1
    pmaddwd xmm14, xmm2
    pmaddwd xmm15, xmm3
    
    paddd xmm12, xmm14
    paddd xmm13, xmm15
    
    paddd xmm12, xmm13
    
    movhlps xmm13, xmm12
    paddd xmm12, xmm13
    
    movss xmm13, xmm12
    psrldq xmm12, 32
    paddd xmm12, xmm13
    
    
    
    pmaddwd xmm8, xmm4
    pmaddwd xmm9, xmm5
    pmaddwd xmm10, xmm6
    pmaddwd xmm11, xmm7
    
    paddd xmm8, xmm10
    paddd xmm9, xmm11
    
    paddd xmm8, xmm9
    
    movhlps xmm9, xmm8
    paddd xmm8, xmm9
    
    movss xmm9, xmm8
    psrldq xmm8, 32
    paddd xmm8, xmm9
    
    
    
    movd r10d, xmm12
    ;sal r10d, 3 ;overflow possibility unless compensated
    shr r10d, 16
    mov [rax+buff_out], r10w
    
    movq r10, xmm8
    ;sal r10d, 2
    shr r10d, 16
    mov [rax+buff_out+2], r10w
    
    add rax, 4
    dec rcx
    jnz loopy
    
    mov rdx, samples*2*2
    mov rsi, buff_out
    mov rdi, 1
    mov rax, sys_write
    syscall
    
    jmp filter
    
    end_fir:
    mov rax, sys_exit
    syscall
    bash-4.2# time ./example < hell.raw >hell2.raw

    real 0m2.206s
    user 0m0.129s
    sys 0m0.251s

    where hell.raw is a 64MB, ~6.25 minutes long, 44100Hz, 16bit stereo
    and most of that time is spent in read() and write()
    I don't get your point, do you mean that audio processing is irrelevant since an assembly language low pass filter is spending most time in read/write and not processing?
    I might have misunderstood, but I understand if you dislike hardware audio "processing" because they do things you shouldn't do to audio streams, but that's not really what true audio and audio processing in games is about..
    The thing is that for one audio stream source, the best is to do *nothing* to it, but in a game, where you have up to several hundred audio streams that are triggered simultaneously, getting this represented correctly with respect to the player position and direction in a surround perspective, you kind of have to. And this probably requires quite a bit more processing time.
    So I think there is definitely benefits to do this in hardware.

    Comment

    Working...
    X