4,887,235 453 454 ;Invisible-pointer traps ;If transporting was needed it has happened already ;Time= 2 cycles trapping + cycles here (defucode inviz-trap (parallel (trap-save) (assign vma memory-data) (assign b-trans-vma memory-data)) (trap-restore (memory-map read) ;Gurkh! Sometimes needed to write here????? <--- (nop))) ;Map-miss trap ;Hardware started memory reference to first level hash table in trapped ;cycle; so the data are available in the first cycle of the trap handler. ;since trapping inserted an extra clock which drove the memory pipeline. ;This is too early since we aren’t ready for it that fast. ;Time = 2 cycles trapping + 4 cycles here in most favorable case. ;It’s 4 cycles rather than 3 because Abus is a bottleneck (VMA, MD). (defucode map-miss-trap (parallel (trap-save) (assign b-map-temp (ldb vma 16. 8))) ;With address space ID? (parallel (increment-pma) (if (equal-fixnum memory-data b-map-temp) ;Match pht key? (trap-restore (write-map-from memory-data) ;Yes, and VMA still set up (map-metering)) ;Spare cycle for metering xxxxxx))) ;Well, go off and search second level ;Disk DMA task. ;The following control regstors are set up by the background ;service task, based on the command list in main memory set up ;by Lisp code. At the same time the hardware control registers ;are (all?) set up. The background service task also bashes the ;DMA task state to start it up at the right place for read or write. ;When the DMA task is done, it wakes up the background tack which ;can tell what happened by looking at the control registers. (defareg a-disk-ma 3000) ;Address of next word to transfer (defareg a-disk-wc 3001) ;Number of words to transfer (minus 3) (defareg a-disk-header 3002) ;Header value being sought (defareg a-disk-timeout 3003) ;Number of header tries before punting ; (maybe heads are positioned wrong) (defareg a-disk-search-cmd 3004);Tell hardware to search for header ;Search subroutine. Returns after reading the header of the desired sector. ;Eats shit and dies if header not found after timeout (does not return). (defucode disk-search (assign b-temp (io-bus-data disk-data)) ;---Read Lbus directly into DP?? ;---Or use extended B memory?? (if (equal-fixnum a-disk-header b-temp) (return) ;Header found. Let caller dismiss. (drop-through)) (parallel (dismiss) (assign (io-bus-data disk-control) a-disk-search-cmd)) ;Try again (parallel (assign a-disk-timeout (1- a-disk-timeout)) (if alu-carry ;Not yet counted to -1 (goto disk-search) ;Wakeup back at disk-search (eat-shit-and-die)))) ;On next wakeup, actually ;Read routine. Initially entered via gratuitous wakeup. Call search ;routine which will return with disk entering data area. ;Most wakeups are only for 2 cycles, except at the start of the wrong ;sector we remain active for 4 cycles, and at the start of the right ;sector we remain active for 5 cycles. These could each be decreased ;by 1 as noted above, and could be decreased more I guess by having ;separate search routines for read and write. (defucode disk-read (dismiss) ;Until start of sector (call disk-search) (dismiss) (jump disk-read-loop)) ;Here for each data word 4,887,235 455 456 (defucode disk-read-loop (parallel (assign pma a-disk-ma) (dma-read disk) (assign a-disk-ma (1+ a-disk-ma)) (dismiss)) (parallel (assign a-disk-wc (1- a-disk-wc)) (if alu-carry ;Not yet counted to -1 (goto disk-read-loop) ;Wake up back there (goto disk-read-last-3)))) ;Here for the third to last data word (defucode disk-read-last-3 (parallel (assign pma a-disk-ma) (dma-read disk) (assign a-disk-ma (1+ a-disk-ma)) (io-bus-stop-signal) ;Tell disk to stop reading after next word (dismiss)) (nop) (parallel ;Swallow last data word (assign pma a-disk-ma) (dma-read disk) (assign a-disk-ma (1+ a-disk-ma)) (dismiss)) (nop) ;Here we have the ECC word and the state machine has stopped ;Since we aren’t doing any double-buffered control registers hacks. ;we simply stop and let the background task look at the hardware ;reglsters and decide what to do. (parallel (awaken-task background-service-task) (dismiss)) (nop) ;XXXbrad - something missing ) ;Kernel of blting from main memory to TV ;This involves no rotation or alu function, just straight copy ;used e.g. to update a screen image. ;TV epoch corresponds to 5 microcode cycles in this version ;If a TV epoch can correspond to 6 cycles (i.e.a we use all fast ; microinstructions) things arm much easier, (defucode tv-copy-kernel (parallel (assign pma a-tv-pma) ;Uses Abus (assign memory-data b-temp1) ;Uses B,X,O busses (increment-pma)) (parallel (assign memory-data b-temp2) ;Store 2nd word in TV (memory-map read)) ;Start next memory read (assign a-tv-pma (+ a-tv-pma (b-constant 2))) ;Memory active, inc pma (parallel (assign b-temp1 memory-data) ;Stash first word from meit (increment-pma)) (parallel (assign b-temp1 memory-data) ;Uses A, X busses (increment-pma))) ;Here we have to be able to increment the VMA by 2 ;Must happen entirely in the MC because there is no ;cycle with abus free to use DP adder to increment it. ;If incremont-pma carries into the page bits of the VMA, ;this will work. ;There is some confusion about increment-pma here. Generally it ;is assumed to increment pma and vma both. But since we are ;leaving the read address in VMA, and switching PMA back and forth ;between a direct load from Abus and mapping from VMA, it's clear ;that this increment-pma really should split in several different ;memory-control functions. ;Cycle Address Bus Data Bus ;1 TV address Write data 1 ;2 Memory Address Write data 2 -- address bus conflict? -- ;3 nil nil ;4 Memory Address+1 Read data 1 ;5 nil Read data 2 ;In cycle 2 the address bus wants to be the memory address so that ;the read can get started, it also wants to be the TV address+1 ;for writing into the TV (except the TV doesn’t actually need to ;look at this anyway). ;Also Memory Address+1 needs to come out in cycle 3, not 4, ;since the memory is interleaved rather than page-mode, ;;; -*- Mode:Lisp; Package:Micro; Base:8; Lowercase:yes -*- ;;; (c) Copyright 1982, Symbolics. Inc. ; Microcode for branch instructions 4,887,235 457 458 ;Get defmicro and all h~s hosts (declare (cond ((not (status feature lmucode)) (load 'udcls)))) ;These are branches the compiler knows about initially (definst branch signed-pc-relative (set-pc (pc-add pc macro-signed-immediate))) ;This gets an offset from memory. Would it be better to get a PC? (definst long-branch constant-pc-relative (assign vma (- frame-function macro-unsigned-immediate 1)) (start-memory read) (assign b-temp pc) (parallel (check-data-type memory-data dtp-fix) (machine-version-case ((tmc tmc5) (sequential (assign a-temp memory-data) (set-pc (pc-add b-temp a-temp)))) (otherwise (set-pc (pc-add b-temp memory-data)))))) (definst branch-false signed-pc-relative (if (data-type? top-of-stack-a dtp-nil) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (parallel (for-effect (popval)) (next-instruction)))) (definst branch-true signed-pc-relative (if (not (data-type? top-of-stack-a dtp-nil)) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (parallel (for-effect (popval)) (next-instruction)))) (definst branch-false-else-pop signed-pc-relative (if (data-type? top-of-stack-a dtp-nil) (goto branch) (parallel (for-effect (popval)) (next-instruction)))) (definst branch-true-else-pop signed-pc-relative (if (not (data-type? top-of-stack-a dtp-nil)) (goto branch) (parallel (for-effect (popval)) (next-instruction)))) (definst branch-false-and-pop signed-pc-relative (if (data-type? top-of-stack-a dtp-nil) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (next-instruction))) (definst branch-true-and-pop signed-pc-relative (if (not (data-type? top-of-stack-a dtp-nil)) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (next-instruction))) ;This is a random selection of other branches (comment ;The compiler doesn’t want to use these yet ;Note: can’t test zero simultaneous with popval due to xbus conflict ;Okay since instruction has to take two cycles even if it doesn’t branch (definst branch-zerop (signed-pc-relative needs-stack) (parallel (check-fixnum-1arg-b top-of-stack (otherwise (signal-error unimplemented-arithmetic))) ;--- (if (zero-fixnum top-of-stack) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (parallel (for-effect (popval)) (next-instruction))))) (definst branch-not-zerop (signed-pc-relative needs-stack) (parallel (check-fixnum-1arg-b top-of-stack (otherwise (signal-error unimplemented-arithmetic))) :--- (if (not-zero-fixnum top-of-stack) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (parallel (for-effect (popval)) (next-instruction))))) 4,887,235 459 460 (definst branch-greater-or-equal (signed-pc-relative needs-stack) (parallel (check-fixnum-2args next-on-stack top-of-stack (otherwise (signal-error unimplemented-arithmetic))) ;--- (decrement-stack-pointer) (if (greater-or-equal-fixnum next-on-stack top-of-stack) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (parallel (for-effect (popval)) (next-instruction))))) (definst branch-eq (signed-pc-relative needs-stack) (parallel (decrement-stack-pointer) (if (equal-typed-pointer next-on-stack top-of-stack) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (parallel (for-effect (popval)) (next-instruction))))) (definst branch-not-eq (signed-pc-relative needs-stack) (parallel (decrement-stack-pointer) (if (not-equal-typed-pointer next-on-stack top-of-stack) (set-pc (pc-add pc macro-signed-immediate) (for-effect (popval))) (parallel (for-effect (popval)) (next-instruction))))) );end comment F:>lmach>ucode>bitblt-block-mode.lisp.1 ; -*- Mode:Lisp; Package:Micro; Base:8: Lowercase:yes -*- ;;;; BITBLT microcode for 3600 (defmicro waiting-for-memory () ;documentation only, I guess. `(nop)) (defmicro abus-array-data (&body body) (parallel (check-data-type memory-data dtp-fix) ;this traps forwarding pointers, right? ,@body)) (defmicro assign-vma-offset (which &rest stuff) (selectq which (S `(assign vma (+ bb-s-row-addr bb-s-offset ,@stuff))) (D `(assign vma (+ bb-d-data-addr bb-d-offset ,@stuff))) (otherwise (ferror () "assign-vma-offset knows about only S and D, not ~d" which)))) (defmicro parallel-with-s-access (offset &body body) (make-memory-access 'bb-s-row-addr 'bb-s-offset offset body)) (defmicro parallel-with-d-access (offset &body body) (make-memory-access 'bb-d-data-addr 'bb-d-offset offset body)) (eval-when (eval compile load) (defun make-memory-access (baseaddr offset-sym offset body) (if (or (eq offset offset-sym) (equal offset `(1+ ,offset-sym))) () (ferror () "~s is not a recognized offset for ~s" offset offset-sym)) (let* ((body (reverse body)) (finally `(abus-array-data ,(car body)))) (do ((ll (reverse `((assign vma ,(if (atom offset) `(+ ,baseaddr ,offset) `(+ ,baseaddr ,(second offset) 1))) (start-memory read) (waiting-for-memory))) (cdr ll)) (body (cdr body) (cdr body)) (l)) ((and (null ll) (null body)) `(sequential ,@l ,finally)) (cond ((null ll) (push (car body) l)) ((null body) (push (car ll) l)) (T (push `(parallel ,(car ll) ,(car body)) l)))))) 4,887,235 461 462 );eval-when ;;---hair these up appropriately (defmicro 32- (operand) `(- (b-constant 32.) ,operand)) (defmicro 31- (operand) `(- (b-constant 31.) ,operand)) (defmicro dispatch-after-this (operand this &body clauses) `(sequential (dispatch-after-next ,operand ,@clauses) (parallel (take-dispatch) ,this))) (defmicro dispatch-after-gen (dispatching-on var-and-indices-and-bod &rest clauses) (let* ((var-and-indices (first var-and-indices-and-bod)) (bod (second var-and-indices-and-bod)) (var (first var-and-indices)) (indices (rest1 var-and-indices))) `(dispatch-after-next ,dispatching-on ,@(loop for index in indices collect `((,index) ,(progv (list var) (list index) (eval bod)))) ,@clauses))) (defmicro incr-d-offset () `(assign bb-d-offset (1+ bb-d-offset))) (defmicro decr-d-offset () `(assign bb-d-offset (1- bb-d-offset))) (defmicro incr-wrap-s-offset () `(sequential (assign bb-s-offset (1+ bb-s-offset)) (if (greater-or-equal-fixnum bb-s-offset bb-s-row-length) (parallel (lisp (format T "~&>>>Wrapping around on bb-a-offset from ~d." (low32 (tr 'bb-b-offset)))) (assign bb-s-offset (b-constant 0))) (drop-through)))) (defmicro decr-wrap-s-offset () `(parallel (assign bb-s-offset (1- bb-s-offset)) (if (minus-fixnum obus) (parallel (lisp (cerror T () () ">>>Decr wrapping around on bb-s-offset")) (assign bb-s-offset (1- bb-e-row-length))) (drop-through)))) (defmicro store-word (datum) `(store-contents (set-type ,datum dtp-fix) () T)) (defmicro parallel-with-return (&body stm) `(,(if (eq *machine-version* `sim) `sequential `parallel) ,@stm (return))) ;;This is incompatible with modularity (defmacro reserve-bitblt-scratchpad-memory (a-start b-start &rest stuff) (loop with a-loc = a-start and b-loc = b-start for (name side) in stuff when (eq side 'a) collect `(defareg-at-loc ,name ,a-loc 0) into forms and do (incf a-loc) when (eq side 'b) collect `(defbreg-at-loc ,name ,b-boc 0) into forms and do (incf b-loc) finally (return `(progn 'compile (reserve-scratchpad-memory ,a-start ,(1- a-loc) ,b-start ,(1- b-loc)) ,@forms)))) (defvar *dp-offset-names* ()) (defmacro def-fp-offsets (&rest names) (loop for i upfrom 0 4,887,235 463 464 for name in names append `((defatomicro ,name (amem (frame-pointer ,i))) (remprop ',name 'defareg-at-loc) (remprop ',name 'defbreg-at-loc) (defprop ,name ,i fp-offset) (or (memq ',name *fp-offset-names*) (push ',name *fp-offset-names*))) into foo finally (return '(progn 'compile ,@foo)))) ;;decode fp offset numbers into symbols. Debugging only. (defun dfp (&rest numbers) (loop for number in numbers collect (loop for name in *fp-offset-names* when (equal (get name 'fp-offset) number) return name finally (return number)))) (def-fp-offsets bb-arg-alu bb-arg-width bb-arg-height ;lisp arg bb-arg-from-array bb-arg-from-x bb-arg-from-y ;lisp arg bb-arg-to-array bb-arg-to-x bb-arg-to-y ;lisp arg bb-width-a ;ucode arg bb-s-data-addr ;ucode arg bb-s-offset-a ;ucode erg bb-s-row-offset ;ucode arg bb-s-bitpos ;ucode erg bb-s-row-length ;ucode erg bb-d-data-addr ;ucode erg bb-d-offset-a ;ucode erg bb-d-bitpos ;ucode erg bb-event-count ;ucode erg bb-alu-operation ;ucode arg ) ;;; Some temporaries, (reserve-bitblt-scratchpad-memory 2650 372 (bb-width b) ;copied from arg on A side (bb-s-offset b) ;.. (bb-d-offset b) ;.. (bb-constant b) ;.. (bb-s-word b) ;temp (a-temp-3 a) ;temp (bb-constant-a a) ;temp (bb-identity a) ;temp (bb-s-word2 a) ;temp (bb-s-row-addr a) ;temp ) (defmicro read-bb-s-word () `(parallel (assign a-temp (+ bb-width bb-s-bitpos)) (call read-bb-s-word1))) ;;Assumptions about setup: ;;bb-constant has: ;; >> for constant operations (0,-1): the constant; ;; >> for operations dependent only on source or destination (x, ~x, y, ~y): ;; a 0 for x,y or -1 for ~x,~y; ;; >> for operations dependent or; both s and d: 0 for those using source directly, ;; and -1 for those that want the source compementcd. (defucode read-bb-s-word1 (assign-vma-offset s) (parallel (assign byte-r (32- bb-s-bitpos)) (start-memory read)) (parallel (waiting-for-memory) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;; source is entirely within one word (parallel-with-return (abus-array-data (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))) ;;source is split across two words (sequential (abus-array-data (assign bb-s-word (rotate memory-data byte-r))) (assign-vma-offset s 1) (parallel (start-memory read) ;byte-r is already Ok ) (parallel (waiting-for-memory) (assign byte-s (1- a-temp))) (abus-array-data (assign bb-s-word (dpb memory-data byte-s byte-r bb-s-word))) (parallel-with-return (assign bb-s-word (logxor bb-s-word bb-constant-a))))))) 4,887,235 465 466 (defucode bb-copy-stuff-to-b-side (assign b-temp bb-s-row-offset) (assign bb-s-row-addr (+ bb-s-data-addr b-temp)) (assign bb-s-offset bb-s-offset-a) (parallel (assign bb-d-offset bb-d-offset-a) (return))) (defmacro defucode-bitblt (name source destination neither both) `(defucode ,name (parallel (assign bb-width bb-width-a) (call bb-copy-stuff-to-b-side)) (dispatch-after-this (ldb bb-alu-operation 4 0) (parallel (assign bb-constant (a-constant 0)) ;assumption, for the (assign bb-constant-a (a-constant 0))) ;common case ((0) ;0 (goto ,neither)) ((1) ;x*y (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((2) ;~x*y (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((3) (return)) ;y ((4) ;x*~y (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((5) (goto ,source)) ;x ((6) ;x xor y (parallel (assign bb-identity (a-constant 0)) (jump ,both))) ((7) ;x+y (parallel (assign bb-identity (a-constant 0)) (jump ,both))) ((8) ;~x*~y (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((9) ;~x xor y (assign bb-identity (a-constant 0)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((10.) ;~x (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((11.) ;~x+y (assign bb-identity (a-constant 0)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((12.) ;~y (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,destination))) ((13.) ;x+~y actually, ~(~x*y) (assign bb-identity (a-constant -1)) (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,both))) ((14.) ;~x+~y actually, ~(x*y) (parallel (assign bb-identity (a-constant -1)) (jump ,both))) ((15.) ;-1 (parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1)) (jump ,neither)))))) (defucode-bitblt ubitblt-short-row ubitblt-short-row-source ubitblt-short-row-destination ubitblt-short-row-neither ubitblt-short-row-both) (defucode-bitblt ubitblt-long-row ubitblt-long-row-source ubitblt-long-row-destination ubitblt-long-row-neither ubitblt-long-row-both) (defucode-bitblt ubitblt-long-row-backwards ubitblt-long-row-source-backwards ubitblt-long-row-destination ubitblt-long-row-neither ;direction immaterial ubitblt-long-row-both-backwards) ;;; These should eventually be folded back into defucode-bitblt (definst %bitblt-short-row no-operand (jump ubitblt-short-row)) (definst %bitblt-long-row no-operand (jump ubitblt-long-row)) (definst %bitblt-long-row-backwards no-operand (jump ubitblt-long-row-backwards)) (definst %bitblt-decode-arrays no-operand 4,887,235 467 468 (jump ubitblt-decode-arrays)) (defucode ubitblt-short-row-source (read-bb-s-word) (assign a-temp (+ bb-width bb-d-bitpos)) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;;destination is entirely within one word (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel-with-return (store-word (dpb bb-s-word byte-s byte-r memory-data)))) ;;destination is split across two words (sequential ;;store the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (store-word (dpb bb-s-word byte-s byte-r memory-data))) ;; store the high byte, using do into md as background (parallel-with-d-access (1+ bb-d-offset) (assign byte-s (1- a-temp)) (assign byte-r bb-d-bitpos) ;;byte-r is ok (parallel-with-return (store-word (ldb bb-s-word byte-s byte-r memory-data)) ))))) (defucode ubitblt-short-row-destination (assign a-temp (+ bb-width bb-d-bitpos)) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;;destination is entirely within one word (sequential (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (assign a-temp-2 memory-data)) (assign b-temp (dpb bb-constant byte-s byte-r (a-constant 0))) (parallel-with-return ;;XXXbrad b-temp? (store-word (logxor b-temp a-temp-2)))) ;;destination is split across two words (sequential ;;munge the low byte (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (assign a-temp-2 memory-data)) (assign b-temp (dpb bb-constant byte-s byte-r (a-constant 0))) (store-word (logxor b-temp a-temp-2)) ;;munge the hign byte (parallel-with-d-access (1+ bb-d-offset) (assign byte-s (1- a-temp)) (assign byte-r (a-constant 0)) (assign a-temp-2 memory-data)) (assign b-temp (ldb bb-constant byte-s byte-r)) (parallel-with-return (store-word (logxor b-temp a-temp-2)))))) ;;:the alu operation is actually a constant (defucode ubitblt-short-row-neither (assign a-temp (+ bb-width bb-d-bitpos)) (parallel (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;; destination is entirely within one wore (parallel-with-d-access bb-d-offset (assign byte-s (1- bb-width)) (assign byte-r bb-d-bitpos) (parallel (store-word (dpb bb-constant byte-s byte-r memory-data)) (return))) ;;destination is split across two words (sequential ;;store the low bute (parallel-with-d-access bb-d-offset (assign byte-s (31- bb-d-bitpos)) (assign byte-r bb-d-bitpos) (store-word (dpb bb-constant byte-s byte-r memory-data))) ;;store the high byte (parallel-with-d-access (1+ bb-d-offset) (assign byte-s (1- a-temp)) (assign byte-r (b-constant 0)) (parallel (store-word (dpb bb-constant byte-s byte-r memory-data)) (return))))))) 4,887,235 469 470 ;;the alu operation depends upon both source and destination bits (defucode ubitblt-short-row-both (read-bb-s-word) (assign a-temp (+ bb-width bb-d-bitpos)) (assign-vma-offset d) (if (lesser-or-equal-fixnum a-temp (b-constant 32.)) ;;destination is entirely within one word (sequential (assign byte-s (1- bb-width)) (parallel (assign byte-r bb-d-bitpos) (jump bb-byte-alu-operation-dispatch))) ;jcall ;;destination is split across two words (sequential ;;store the low byte (assign byte-s (31- bb-d-bitpos)) (parallel (assign byte-r bb-d-bitpos) (call bb-byte-alu-operation-dispatch)) ;;store the high byte (assign bb-s-word (rotate bb-s-word byte-r)) (assign byte-s (1- a-temp)) (assign-vma-offset d 1) (parallel (assign byte-r (b-constant 0)) (jump bb-byte-alu-operation-dispatch))))) ;jcall ;(boole fn x y ...) if fn is "abcd" then ; y 0 1 2 3 4 5 6 7 ; | 0 1 0 x*y ~x*y y x*~y x x#y x+y ; ---------- ; 0 | a c 8 9 10 11 12 13 14 15 ; x | ~(x+y) ~(x#y) ~x ~x+y ~y x+~y ~x+~y -1 ; 1 | b d ;;vma and byte regs have been set up already, for DPB. ;;trashes b-temp, a-temp-2, b-temp-2, but not a-temp. (defucode bb-byte-alu-operation-dispatch (dispatch-after-this (parallel (start-memory read) (ldb bb-alu-operation 4 0)) (parallel (assign b-temp (dpb bb-s-word byte-s byte-r bb-identity)) (waiting-for-memory)) ((1 2) ;;1 x*y logand ;;2 ~x*y logand (parallel-with-return (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logand memory-data b-temp)))))) ((4 8.) ;;4 ~(~x+y) = x*~y andc2 ;;8 ~(x+y) = ~x*~y andcb (parallel (declare-memory-timing data-cycle) (abus-array-data (assign a-temp-2 memory-data))) (assign b-temp-2 (dpb (b-constant -1) byte-s byte-r 0)) ;can’t merge this... (assign a-temp-2 (logxor a-temp-2 b-temp-2)) ;...with this. (parallel-with-return (store-word (logand a-temp-2 b-temp)))) ((6 9.) ;;6 x#y logxor ;;9 ~(x#y)=~x#y logxor (parallel-with-return (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logxor b-temp memory-data)))))) ((7 11.) ;;7 x+y logior ;;11 ~x+y logior (parallel-with-return (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logior b-temp memory-data)))))) ((13. 14.) ;;13 x+~y = ~(~x*y) lognand ;;14 ~x+~y=~(x*y) (parallel (declare-memory-timing data-cycle) (abus-array-data (assign a-temp-2 (logand b-temp memory-data)))) (parallel-with-return (store-word (logxor (dpb (b-constant -1) byte-s byte-r 8) a-temp-2)))) (otherwise (goto cant-happen)))) ;;vma has been set up already (defucode bb-word-alu-operation-dispatch ;commonly 3 cycles (plus 1 for the call) (dispatch-after-this (parallel (start-memory read) (ldb bb-alu-operation 4 0)) (waiting-for-memory) ;---want to use this somehow... ((1 2) ;;1 x*y logand ;;2 ~x*y logand (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logand bb-s-word memory-data))) (return))) ((4 8.) ;;4 x*~y andcb ;;8 ~(x+y) ~-x*~y andcb (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (andc2 bb-s-word memory-data))) (return))) 4,887,235 471 472 ((6 9.) ;;6 x#y logxor ;;9 ~(x#y)=~x#y logxor (parallel (declare-memory-timing data-cycle) (abus-array-data (store-word (logxor bb-s-word memory-data))) (return))) ((7 11.) ;;7 x+y logior ;;11 ~x+y logior (parallel (declare-memory=timing data-cycle) (abus-array-data (store-word (logior bb-s-word memory-data))) (return))) ((13. 14.) ;;13 x+~y = ~(~x*y) ;;14 ~x+~y=~(x*y) (parallel (declare-memory-timing data-cycle) (abus-array-data (assign a-temp-2 (logand bb-s-word memory-data)))) (parallel (store-word (logxor (b-constant -1) a-temp-2)) (return))) (otherwise (goto cant-happen)))) ;;alu depends only on source bits (defucode ubitblt-long-row-source (parallel (assign b-temp bb-d-bitpos) (if (zero-fixnum bb-d-bitpos) (if (zero-fixnum bb-s-bitpos) (parallel (assign bb-s-offset (1- bb-s-offset)) ;bb-aligned-row-source will increment first (jump (trace-path #/a)) (jump ubitblt-aligned-row-source)) ;; SSSSSSSSSSSSSSSSSSSSSSSSSsssssss ;; dddddddddddddddodddddddddddddddd (parallel-with-s-access bb-s-offset (assign byte-r (32- bb-s-bitpos)) (parallel (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))) (lisp (trace-path #/c)) (jump ubitblt-d-aligned-row-source)))) (if (equal-fixnum b-temp bb-s-bitpos) ;;SSSSSSSSSSSSSSSSSSSSSSSSSsssssss ;;DDDDDDDDDDDDDDDDDDDDDDDDDddddddd (sequential (parallel-with-s-access bb-s-offset (assign a-temp (32- bb-d-bitpos)) (assign byte-r a-temp) (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))) (parallel-with-d-access bb-d-offset (assign byte-r bb-d-bitpos) (assign byte-s (1- a-temp)) (store-word (dpb bb-s-word byte-s byte-r memory-data))) (incr-d-offset) (parallel (assign bb-width (- bb-width a-temp)) (lisp (trace-path #/b)) (jump ubitblt-aligned-row-source))) (if (lesser-fixnum bb-s-bitpos b-temp) ;;sssssssssSSSSSSSSSSSSSSSS....... ;; DDOODDDDDDDDDODDdddddddddddddddd ;; <- 32-d.bitpos-> (sequential (parallel-with-s-access bb-s-offset (assign byte-r (32- bb-s-bitpos)) (parallel (assign b-temp (32- bb-d-bitpos)) (assign a-temp obus)) (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))) ;;.......sssssssssSSSSSSSSSSSSSSSS (parallel-with-d-access bb-d-offset (assign byte-r bb-d-bitpos) (assign byte-s (1- b-temp)) (store-word (dpb bb-s-word byte-s byte-r memory-data))) (incr-d-offset) ;;rotate s-word further to right by 32-d.bitpos ;;SSSSSSSSSSSSSSSS.......sssssssss (assign byte-r bb-d-bitpos) ;or left by -(32-d.bitpos) (assign bb-s-word (rotate bb-s-word byte-r)) (assign bb-width (- bb-width a-temp)) (parallel (assign bb-s-bitpos (+ bb-s-bitpos b-temp)) (lisp (trace-path #/d)) (jump ubitblt-d-aligned-row-source))) (sequential ;;The high part of the first source word is not as long as the high part of the ;;first destination word. So extract the useful part of the first source word, ;;and deposit into it as much of the second source word as needed to fill out the rest ;;of the first destination word. Then position the rest of the second source word ;;appropriately for the inner loop.