; 1k boing - 1k amiga boing demo
; Released at Finnish Amiga Party 2013
; Copyright (C) Harry "Piru" Sintonen <sintonen@iki.fi>
;
; http://www.pouet.net/prod.php?which=62221
;
; Remarks
; - boingdata was generated by a small JavaScript code snippet which
;   does the packing to 2+1 bit per coord delta format. The actual
;   coordinate data for the ball was captured by patching the
;   original Boingdemo to dump the coordinates for the AreaMove +
;   AreaDraw calls.
; - The compressed scrolltext was generated by a small C
;   preprocessor.
; - The scrolling is far from perfect. The slowness can mostly be
;   attributed to AmigaOS deficiencies. While KS 2.x improved the
;   performance it is far too slow to achieve smooth 50 FPS.
;
	include "exec/memory.i"
	include "exec/execbase.i"
	include "graphics/rastport.i"
	include "graphics/gfxbase.i"
	include "intuition/screens.i"
	include "all_lvo.i"
	include "hardware/dmabits.i"
	include "hardware/custom.i"

_custom		EQU	$dff000

WID		EQU	320
HEI		EQU	(256+10-40)
AREASIZE	EQU	5
TMPRASSIZE	EQU	(64*64/8)
PLANESIZE	EQU	(WID*HEI/8*5)	; 1 extra plane for y-scroll
SAMPLESIZE	EQU	8

	STRUCTURE shit,0
	APTR	s_intuibase
	APTR	s_gfxbase
	UWORD	s_colorslide
	STRUCT	s_cyctab,14*2*2
	ALIGNLONG
	STRUCT	s_ns,ns_SIZEOF
	ALIGNLONG
	STRUCT	s_ta,ta_SIZEOF
	ALIGNLONG
	STRUCT	s_bitmap,bm_SIZEOF
	ALIGNLONG
	APTR	s_topscreen
	APTR	s_ballscreen
	STRUCT	s_areainfo,ai_SIZEOF
	ALIGNLONG
	STRUCT	s_tmpras,tr_SIZEOF
	ALIGNLONG
	LABEL	s_scrollbuf
	UWORD	s_prevx
	UWORD	s_prevy
	UWORD	s_x1
	UWORD	s_y1
	UWORD	s_x2
	UWORD	s_y2
	UWORD	s_x3
	UWORD	s_y3
	UWORD	s_x4
	UWORD	s_y4
	STRUCT	s_rowbuf,55*2*2*4
	ALIGNLONG
	STRUCT	s_areabuffer,AREASIZE*5
	ALIGNLONG
	STRUCT	s_colortab,2*21
	ALIGNLONG
	LABEL	shit_SIZEOF

	dc.b	'D','O','S',0
	dc.l	0
mflags:	dc.l	MEMF_CHIP|MEMF_CLEAR

MMIN	MACRO
	IFGT (\1)-256
	IFGT (\1)-512
	IFGT (\1)-1024
	IFGT (\1)-2048
	IFGT (\1)-4096
	IFGT (\1)-8192
	IFGT (\1)-16384
	IFGT (\1)-32512
	IFGT (\1)-49152
	move.l	#\1,\2
	ELSE
	moveq	#12,\2
	lsl.l	\2,\2	; 49152
	ENDC
	ELSE
	moveq	#127,\2
	lsl.l	#8,\2	; 32512
	ENDC
	ELSE
	moveq	#64,\2
	lsl.l	#8,\2	; 16384
	ENDC
	ELSE
	moveq	#32,\2
	lsl.l	#8,\2	; 8192
	ENDC
	ELSE
	moveq	#16,\2
	lsl.l	#8,\2	; 4096
	ENDC
	ELSE
	moveq	#8,\2
	lsl.l	#8,\2	; 2048
	ENDC
	ELSE
	moveq	#4,\2
	lsl.l	#8,\2	; 1024
	ENDC
	ELSE
	moveq	#3,\2
	lsl.l	#8,\2	; 512
	ENDC
	ELSE
	moveq	#2,\2
	lsl.l	#8,\2	; 256
	ENDC
	ENDM

main:
	MMIN	(shit_SIZEOF+TMPRASSIZE+PLANESIZE),d0
	move.l	(mflags,pc),d1
	jsr	(_LVOAllocMem,a6)
	move.l	d0,a5

	move.w	#'in',d0
	pea	(.after_openlib,pc)
.openlib:
	lea	(LibList,a6),a4
.ol_loop:
	move.l	(a4),a4
	move.l	(LN_NAME,a4),a0
	cmp.w	(a0),d0
	bne.b	.ol_loop
	rts
.after_openlib:
	move.l	a4,(s_intuibase,a5)
	move.w	#'gr',d0
	bsr.b	.openlib
	move.l	a4,(s_gfxbase,a5)

	lea	(s_bitmap,a5),a1
	move.l	a1,(s_ns+ns_CustomBitMap,a5)
	move.l	#((WID/8)<<16)|HEI,(a1)+	; bm_BytesPerRow, bm_Rows
	move.l	#((0)<<24)|((4)<<16),(a1)+	; bm_Flags, bm_Depth, bm_Pad
	lea	(shit_SIZEOF+TMPRASSIZE+(WID*HEI)/8,a5),a0
	moveq	#4-1,d0
.bmlop:	move.l	a0,(a1)+
	lea	(WID*HEI/8,a0),a0
	dbf	d0,.bmlop

	; 1st screen with the text/scroll
	lea	(s_ns+ns_Width,a5),a2
SCROLLWID	EQU	4096
	move.l	#((WID+SCROLLWID)<<16)|22,(a2)+	; ns_Width, ns_Height (NOTE: less than 12 crashes 1.x)
	addq.w	#1,(a2)			; ns_Depth
	move.w	#CUSTOMSCREEN|SCREENQUIET,(s_ns+ns_Type,a5)
	lea	(s_ta,a5),a0
	move.l	a0,(s_ns+ns_Font,a5)
	move.l	(gb_DefaultFont,a4),a1
	move.l	(LN_NAME,a1),(a0)+	; 'topaz.font',0
	move.w	#9,(a0)

	pea	(.after_openscreen,pc)
.openscreen:
	move.l	(s_intuibase,a5),a6
	lea	(s_ns,a5),a0
	jmp	(_LVOOpenScreen,a6)
.after_openscreen:
	move.l	d0,(s_topscreen,a5)

	; set the palette for the top screen
	move.l	d0,a0			;move.l	(s_topscreen,a5),a0
	subq.w	#3,(s_colortab+2,a5)	;move.w	#$0dfe,(s_colortab+2,a5)
	bsr.b	.loadcolors_scr

	; 2nd screen with the ball
	addq.w	#4-1,(a2)		; ns_Depth = 4
	move.l	#(WID<<16)|HEI,-(a2)	; ns_Width, ns_Height
	move.w	#40+12,-(a2)		; ns_TopEdge
	move.w	#CUSTOMSCREEN|SCREENQUIET|CUSTOMBITMAP,(s_ns+ns_Type,a5)
	bsr.b	.openscreen

	move.l	d0,(s_ballscreen,a5)
	move.l	d0,a4

	; black screen while drawing
	pea	(.after_loadcolors,pc)
.loadcolors:
	move.l	(s_ballscreen,a5),a0
.loadcolors_scr:
	move.l	(s_gfxbase,a5),a6

	lea	(sc_ViewPort,a0),a0
	lea	(s_colortab,a5),a1
	moveq	#21,d0
	jmp	(_LVOLoadRGB4,a6)
.after_loadcolors:

	lea	(s_areainfo,a5),a0
	move.l	a0,(sc_RastPort+rp_AreaInfo,a4)
	lea	(s_areabuffer,a5),a1
	moveq	#AREASIZE,d0
	jsr	(_LVOInitArea,a6)

	lea	(s_tmpras,a5),a0
	move.l	a0,(sc_RastPort+rp_TmpRas,a4)
	lea	(shit_SIZEOF,a5),a1
	MMIN	TMPRASSIZE,d0
	;move.l	#TMPRASSIZE,d0
	jsr	(_LVOInitTmpRas,a6)

BOINGSIZE	EQU	(55*8)

	moveq	#-(256-(55*2*4/2)),d6
	lea	(s_rowbuf,a5),a0
.rinit:	move.l	#(71<<16)|5,(a0)+	; topmost coord
	subq.b	#1,d6
	bne.b	.rinit

	lea	(boingdata,pc),a3
	moveq	#-3,d4		; d4 = current color (initial -3+4+1 = 2)
	; d5 = current data byte
	; d6.b = number of bits left in data byte
	;moveq	#0,d6		; no bits yet (from the loop above)

	moveq	#BOINGSIZE/4,d7
	lsl.l	#2,d7
.oloop:
	move.l	d7,d0
	lea	(s_prevx,a5),a0
	divu.w	#55,d0
	lea	(s_x1,a5),a4
	lsr.l	#4,d0		; since division result is max 8, >>4 will do to test for 0 divisor
	bne.b	.notfirst

	; start of a row
	lea	(s_rowbuf,a5),a2
	addq.l	#4,d4		; shift row start color
	pea	(.after_getbits,pc)
; extract bits from a bitstream
;
;  IN: d1.w = number of bits to get
; OUT: d0.w = value, masked
; INT: a3 = data ptr, d5 = current byte, d6 = number of bits left
.get7bits:
	moveq	#7,d1
.getbits:
	clr.w	d0
.gb_loop:
	subq.b	#1,d6
	bcc.b	.gb_nofetch
	move.b	(a3)+,d5	; get new byte
	addq.b	#8,d6		; we have 8 more bits now
.gb_nofetch:
;;	move.w	d5,$dff180
	add.b	d5,d5		; get 1 bit in carry
	addx.w	d0,d0		; get resulting value
	subq.b	#1,d1
	bne.b	.gb_loop
	rts
.after_getbits:

	swap	d0		; x1 to upper 16 bits
	bsr.b	.get7bits
	move.l	d0,(a0)		; d0 -> s_prevx
	move.l	d0,(a4)		; d0 -> s_x1
	bra.b	.donefirst
.notfirst:
	moveq	#2,d1
	bsr.b	.getbits
	cmp.w	#BOINGSIZE-194,d7	; there's one location with delta of 4, so hardcode it
	bne.b	.skipmagic
	addq.w	#1,d0
.skipmagic:
	swap	d0
	moveq	#1,d1
	bsr.b	.getbits
	sub.l	d0,(a4)		; s_x1/s_y1 -= d0 (works coz there are no overflows)
.donefirst:
	; swap 1 and 2 points
	move.l	(a0),(s_x2,a5)	; s_prevx -> s_x2
	move.l	(a4),(a0)	; s_x1 -> s_prevx

	; fetch previous points & store for next round
	move.l	(a2),(s_x4,a5)
	move.l	(a4),(a2)+	; s_x1
	move.l	(a2),(s_x3,a5)
	move.l	(s_x2,a5),(a2)+	; s_x2

	; handle color
	addq.l	#1,d4
	cmp.w	#15,d4
	bcs.b	.nocolorwrap
	moveq	#2,d4
.nocolorwrap:
	move.b	d4,d0
	move.w	#_LVOSetAPen+128,d3
	bsr.b	.callg

	; draw the quad
	moveq	#_LVOAreaMove+128,d3
	bsr.b	.call
	subq.w	#6,d3			; _LVOAreaDraw
	moveq	#3,d2
.dloop:	bsr.b	.call
	subq.b	#1,d2
	bne.b	.dloop
	subq.w	#6,d3			; _LVOAreaEnd
	pea	(.pastcall,pc)
.call:	movem.w	(a4)+,d0/d1
.callg:	move.l	(s_ballscreen,a5),a1
.callr:	lea	(sc_RastPort,a1),a1
	jmp	(-128,a6,d3.w)
.pastcall:

	subq.w	#1,d7
	bne.b	.oloop

	; render the scroll text
	;lea	(scroll,pc),a3	; a3 = scrolltext
	move.l	(s_topscreen,a5),a4
	move.l	#((WID+512+18)<<16)|(7+10),(sc_RastPort+rp_cp_x,a4)

	moveq	#-(256-SCROLLSIZE),d2
	;moveq	#0,d6		; d6 = 0 already from previous decode
.unpackscroll:
	moveq	#6,d1
	bsr.b	.getbits
	add.b	#' ',d0
	lea	(s_scrollbuf,a5),a0
	move.b	d0,(a0)
	moveq	#1,d0
	move.l	a4,a1
	moveq	#_LVOText+128,d3
	bsr.b	.callr
	addq.w	#2,(sc_RastPort+rp_cp_x,a4)
	subq.b	#1,d2
	bne.b	.unpackscroll

	; main loop

	moveq	#0,d3	; (xpos << 16 ) | ypos (already 0 from the above loop)
	;moveq	#0,d4	; audio effect volume (already small value from draw loop)
	moveq	#2,d5	; xspeed (either -2 or 2)
	;moveq	#1,d5	; xspeed (either -1 or 1)
	moveq	#0,d6	; yspeed
	;moveq	#0,d7	; colorcycle pos (already 0 from above loop)

	; set up sine sample
	; NOTE: a2 = buffer in chip
	lea	_custom+aud0+ac_ptr,a4
	move.l	a2,(a4)+		; ac_ptr
	move.w	#SAMPLESIZE/2,(a4)+	; ac_len
	move.l	#$005a7f5a,(a2)+
	move.l	#$00a681a6,(a2)
	move.w	#DMAF_SETCLR|DMAF_MASTER|DMAF_AUD0,(dmacon-(aud0+ac_per),a4)	; enable audio

.mainloop:
	; make palette cycle table
	lea	(s_colorslide,a5),a0
	move.l	#$0faa0f00,d0
	move.b	(a0),d1
	addq.w	#4,(a0)+	; a0 = s_cyctab
	ror.l	d1,d0
	moveq	#2-1,d2
.mkcl:	move.l	d0,-(sp)
	move.w	(sp),(a0)+
	moveq	#7-1,d1
.redl:	move.w	d0,(a0)+
	dbf	d1,.redl
	moveq	#6-1,d0
.whil:	move.w	d1,(a0)+	; d1 = 0xffff from above loop
	dbf	d0,.whil
	move.l	(sp)+,d0
	dbf	d2,.mkcl

	; handle colorcycling offset
	moveq	#14*2,d0
	sub.w	d5,d7
	bge.b	.cwp2
	add.w	d0,d7
.cwp2:	cmp.w	d0,d7
	blt.b	.cwp1
	sub.w	d0,d7
.cwp1:
	; generate current colortable
	lea	(s_cyctab,a5,d7.w),a0
	lea	(s_colortab+4,a5),a1	; NOTE: d0 = 14*2 from above!
.ccyc:	move.l	(a0)+,(a1)+
	subq.l	#4,d0
	bne.b	.ccyc

	bsr.b	.loadcolors

	jsr	(_LVOWaitTOF,a6)

	; move ball in x and y

	; y movement first
	sub.w	d6,d3
	cmp.w	#-(HEI-10-105),d3
	bge.b	.yno
	bsr.b	.play		; play sample on bottom collision
	neg.w	d6
	bra.b	.yskip
.yno:	addq.w	#1,d6		; gravity
.yskip:
	; x movement
	swap	d3
	sub.w	d5,d3
	bpl.b	.xtog
	cmp.w	#-(WID-115),d3
	bge.b	.xno
.xtog:	bsr.b	.play		; play sample on sidewall collision
	neg.w	d5
.xno:	swap	d3

	; handle "dynamic" audio playback
	move.w	d4,(ac_vol-ac_per,a4)
	beq.b	.volis0
	subq.w	#2,d4
;	subq.w	#1,d4
.volis0:

	move.l	(s_intuibase,a5),a6
	move.l	(s_ballscreen,a5),a0
	bsr.b	.scroll
	move.l	d3,-(sp)
	move.l	(s_colorslide,a5),d3
	;lsr.l	#1,d3
	and.l	#(SCROLLWID-1)<<16,d3
	move.l	(s_topscreen,a5),a0
	bsr.b	.scroll
	move.l	(sp)+,d3

	jsr	(_LVORethinkDisplay,a6)
	bra.b	.mainloop

.scroll:
	move.l	(sc_ViewPort+vp_RasInfo,a0),a1
	move.l	d3,(ri_RxOffset,a1)
	jmp	(_LVOMakeScreen,a6)

;  IN: d0.w = inital samplerate
; OUT: -
; INT: d4.w = volume
;      a4   = _custom+aud0+ac_per
.play:
	move.l	d3,d0
	swap	d0
	add.w	#50000/32,d0
	lsl.w	#5,d0
	move.w	d0,(a4)
	moveq	#64,d4
	rts

boingdata:
		dc.b	$b6,$38,$0,$80,$41,$84,$83,$49,$34,$9a,$4d,$2a,$93,$49,$c4,$d2,$68,$26,$90,$45,$4,$0,$d2,$74,$4,$10,$69,$26,$aa,$8e,$5b,$2c,$b6,$59,$6c,$f6,$59,$6a,$b2,$a8,$d5,$48,$34,$10,$e0,$c1,$2,$82,$4e,$38,$e5,$97,$cb,$e7,$f7,$ef,$7f,$f3,$fb,$f5,$b3,$d8,$e5,$49,$a4,$d0,$db,$8,$0,$92,$69,$c5,$6c,$f6,$7b,$f7,$f7,$ff,$f7,$fe,$ff,$bd,$f2,$f9,$6a,$a9,$34,$90,$c7,$4c,$0,$9a,$4d,$2b,$1c,$be,$5b,$bd,$fe,$7f,$ef,$de,$ff,$3d,$9e,$cb,$1c,$71,$a4,$11,$a5,$78,$0,$82,$61,$2a,$9c,$56,$3b,$2c,$b2,$f9,$6c,$b2,$d9,$65,$b1,$c5,$53,$49,$34,$2,$7b,$8c,$0,$2,$8,$a0,$93,$41,$34,$9a,$8d,$26,$95,$49,$a4,$d2,$69,$26,$12,$c,$20,$10,$51,$80,$0,$0,$0,$0,$0,$0,$0,$0,$0,$0,$0,$0,$0,$0,$0,$20,$0,$0,$0,$0

;scroll:
		dc.b	$46,$b0,$22,$be,$9b,$a7,$92,$5b,$6f,$2,$2e,$40,$c2,$9c,$b5,$2,$1d,$0,$9a,$9b,$ae,$a7,$3a,$0,$86,$da,$67,$84,$c,$21,$cb,$4e,$40,$49,$4,$53,$38,$d,$28,$94,$8,$a1,$b2,$c0,$29,$cc,$b,$61,$92,$50,$2f,$98,$5,$14,$40,$c,$75,$86,$4c,$c0,$a6,$e0,$11,$50,$8,$ef,$b2,$fc,$b3,$38,$9,$f2,$96,$5d,$29,$ba,$7c,$c0,$d2,$f0,$21,$8e,$39,$73,$ce,$9b,$ee,$30,$9,$25,$ae,$19,$25,$ba,$39,$4c,$3,$4c,$e9,$30,$c,$b2,$96,$29,$6c,$30,$8,$b5,$ba,$e9,$68,$0,$60,$32,$97,$3d,$0,$be,$60,$3,$86,$da,$67,$86,$6a,$6e,$38,$9,$b5,$8e,$ba,$6e,$9f,$30,$34,$bc,$b,$21,$b6,$5c,$b3,$4
SCROLLSIZE	EQU	189