// http://neil.franklin.ch/Projects/PDP-10/pdp10.java
//   - PDP-10 clone microprocessor, main (and presently only) source file
// author Neil Franklin, last modification 2002.06.25


// ------ prerequisites section

// to really understand this code you should
//   have an understanding of the basics of
//     how a computer/processor executes machine code
//     how to design logic circuits, such as with [74|74LS|74F]xx(x) chips
//     the basics of how programmable logic works, particularly FPGAs
//   have read the specifics from the
//     PDP-10 reference manual http://www.36bit.org/dec/manual/ad-h391a-t1.pdf
//     Xilinx Virtex data sheet http://www.xilinx.com/partinfo/ds003.pdf
//     Xilinx JBits docs (with the software, to get both mail jbits@xilinx.com)

// for information on what has been done, is being done, or is to be done
//   refer to the log file at http://neil.franklin.ch/Projects/PDP-10/Logfile


// ------ Java import section

import java.lang.*;
import java.io.*;
import java.util.*;
import java.text.*;

import com.xilinx.JBits.Virtex.Bitstream;
import com.xilinx.JBits.Virtex.JBits;
import com.xilinx.JBits.Virtex.Devices;
import com.xilinx.JBits.Virtex.ConfigurationException;

import com.xilinx.JBits.Virtex.Bits.LUT;
import com.xilinx.JBits.Virtex.Bits.S0Control;
import com.xilinx.JBits.Virtex.Bits.S1Control;
import com.xilinx.JBits.Virtex.Bits.S0Clk;
import com.xilinx.JBits.Virtex.Bits.S1Clk;
import com.xilinx.JBits.Virtex.Bits.S0RAM;
import com.xilinx.JBits.Virtex.Bits.S1RAM;

import com.xilinx.JBits.Virtex.Util;

import com.xilinx.JBits.CoreTemplate.Pin;
import com.xilinx.JRoute2.Virtex.JRoute;
import com.xilinx.JRoute2.Virtex.ResourceDB.CenterWires;
import com.xilinx.JRoute2.Virtex.RouteException;


// ------ JBits startup/shutdown overhead section

public class pdp10 {

  // files we want to generate
  final public static String OutFileName = "pdp10.bit";
  final public static String ListFileName = "pdp10.lst";
  public static PrintWriter ListFile;

  // define specifica of what chip we are using
  final public static String DeviceName = "XCV300";
  final public static String DeviceNullbitfile = "XCV300/null300.bit";
  final public static int DeviceMaxRow = 31, DeviceMaxCol = 47;

  // these are accessed in multiple functions
  public static JBits Fpga;
  public static JRoute Router;

  public static void main(String args[]) {

    // set up what we are going to do
    String InFileName = "/usr/local/JBits/data/Bitstream/" + DeviceNullbitfile;

    // get the device type
    int DeviceType = Devices.getDeviceType(DeviceName);
    if (DeviceType == Devices.UNKNOWN_DEVICE) {
      System.out.println("Did not recognize device type " + DeviceName);
      System.exit(-2); }
    // test if the device is supported
    if (Devices.isSupported(DeviceType) == false) {
      System.out.println("Unsupported device type. Exiting");
      System.exit(-3); }

    // initialise device storage
    Fpga = new JBits(DeviceType);

    // and make using the router possible
    Router = new JRoute(Fpga);

    // read the null bitstream
    System.out.println("reading in " + InFileName + " ...");
    try {
      Fpga.read(InFileName); }
    catch (FileNotFoundException Fe) {
      System.out.println("File " + InFileName + " not found");
      System.out.println(Fe); }
    catch (IOException Io) {
      System.out.println("IO exception reading bitstream file");
      System.out.println(Io); }
    catch (ConfigurationException Ce) {
      System.out.println("Configuration exception while reading bitstream");
      System.out.println(Ce); }

    // open listfile
    System.out.println("opening listfile " + ListFileName + " ...");
    try {
      ListFile = new PrintWriter(
        new FileOutputStream(ListFileName)); }
    catch (FileNotFoundException Fe) {
      System.out.println("File " + ListFileName + " not opened");
      System.out.println(Fe); }

    // modify the bitstream
    System.out.println("modifying bits ...");
    pdp10();

    // close listfile
    ListFile.close();

    // write the bitstream
    System.out.println("writing out " + OutFileName + " ...");
    try {
      Fpga.write(OutFileName); }
    catch (IOException Io) {
      System.out.println("IO exception writing bitstream file");
      System.out.println(Io); } }


// ------ auxilary code sections from here on

  // all these "auxilary code for xxx section" sections are not PDP-10 specific
  //   they are just code to abstract from the JBits library
  //   comfort stuff to simplify the actual PDP-10 code
  // they really belong into some form of include file or library, to hide them
  //   but dumb Java too is stupid and does not know of either of these
  // the nearest thing would be an class, requiring here instantiation
  //   and object name in every function and constant, part-destroying gains
  // so they have been included here, *** you best jump over them ***
  // for the actual PDP-10 design implementation start reading at the line
  //   "// ------ actual PDP-10 design implementation sections from here on"


// ------ auxilary code for placing algorithm section

  // start allocation of columns/slices/rows/LUTs for placing of logic
  //   each section reserves space by incrementing, protecting used resources

  // placing starts at bottom/left point of FPGA
  //   Col: 0 is left and growing right, as going from left rightwards start 0
  //   Sli: 1 is left and 0 is right, as going from left rightwards start 1
  //   Row: 0 is bottom and growing up, as going from bottom up start 0
  //   Lut: 0 is F and 1 is G, as going from bottom up start 0

  // variables for present placing position
  public static int Col = 0, Sli = 1, Row = 0, Lut = 0;


  // LUT (Lut and Row) placing position stepping functions

  public static void nextlut() {
    Lut = 1-Lut; if (Lut == 0) Row++; }

  public static void alignlutf() {
    if (Lut != 0) nextlut(); }

  public static void secondlut() {
    Lut = 1; }

  public static void lastlut() {
    Row = DeviceMaxRow; Lut = 1; }


  // slice (Sli and Col) placing position stepping functions

  public static void nextsli() {
    Row = 0; Lut = 0;
    Sli = 1-Sli; if (Sli == 1) Col++; }

  public static void alignsli1() {
    if (Sli != 1) nextsli(); }

  public static void secondsli() {
    // reset effect from using secondlut(), back to first LUT
    Lut = 0;
    // then go to second slice
    Sli = 0; }

  public static void lastsli() {
    Row = 0; Lut = 0;
    Col = DeviceMaxCol; Sli = 0; }


  // zigzag placing functions for data paths using LUT or slice pairs

  public static void nextzigzagsli(boolean zig) {
    // reset effect from using secondlut(), back to first LUT before next slice
    Lut = 0;
    // "zig" goto next slice same row, "zag" back to first slice next row
    if (zig) {
      Sli = 1-Sli; if (Sli == 1) Col++; Row+=0; }
    else {
      Sli = 1-Sli; if (Sli == 0) Col--; Row++; } }

  public static void nextzigzagclb(boolean zig) {
    // reset effect from using secondsli and an second secondlut()
    Sli = 1; Lut = 0;
    // "zig" goto next column same row, "zag" back to first column next row
    if (zig) {
      Col++; Row+=0; }
    else {
      Col--; Row++; } }


  // return formatted string showing present position in format cccS/rrrL
  public static String pos() {
    // Col and Row front-fill with zeros to 3 places
    String ColFilled = "000"+Integer.toString(Col);
    ColFilled = ColFilled.substring(
      ColFilled.length()-3, ColFilled.length());
    String RowFilled = "000"+Integer.toString(Row);
    RowFilled = RowFilled.substring(
      RowFilled.length()-3, RowFilled.length());
    // Sli and Lut convert to symbolic constants
    String SliLR = (Sli == 1) ? "L" : "R";
    String LutFG = (Lut == 0) ? "F" : "G";
    // merge all to X/Y format position string
    return (ColFilled + SliLR + "/" + RowFilled + LutFG); }


// ------ auxilary code for 0/1 slice and F/G LUT independant constants section

  // constants that allow writing code that can set any of the 4 LUTs in a CLB
  //   without being 0/1 slice or F/G LUT position dependant
  // get rid of if-s and duplicated code with just diff sets of constants in it

  // this is the worst to read of all the auxillary code sections
  // ** you do NOT need to understand this section to comprehend the design **


  // config bit constants for Fpga.set(Row, Col, what[Sli][Lut], to[Sli][Lut]);
  //   and for Fpga.set(Row, Col, what[Sli], to[Sli]);
  //   allways constant for what to set, then constants for setting it to

  // mode settings for LUTs, are partially only slice dependant
  final public static int LutModeLut[][][] =
    { S0RAM.LUT_MODE, S1RAM.LUT_MODE };
  final public static int LutModeLut_Off[][] =
    { S0RAM.OFF, S1RAM.OFF };
  final public static int LutModeLut_On[][] =
    { S0RAM.ON, S1RAM.ON };
  final public static int LutModeRam[][][][] =
    { { S0RAM.F_LUT_RAM, S0RAM.G_LUT_RAM },
      { S1RAM.F_LUT_RAM, S1RAM.G_LUT_RAM } };
  final public static int LutModeRam_Off[][][] =
    { { S0RAM.OFF, S1RAM.OFF },
      { S0RAM.OFF, S1RAM.OFF } };
  final public static int LutModeRam_On[][][] =
    { { S0RAM.ON, S1RAM.ON },
      { S0RAM.ON, S1RAM.ON } };
  final public static int LutModeRam32[][][] =
    { S0RAM.RAM_32_X_1, S1RAM.RAM_32_X_1 };
  final public static int LutModeRam32_Off[][] =
    { S0RAM.OFF, S1RAM.OFF };
  final public static int LutModeRam32_On[][] =
    { S0RAM.ON, S1RAM.ON };
  final public static int LutModeLutRamDualRam32[][][] =
    { S0RAM.DUAL_MODE, S1RAM.DUAL_MODE };
  final public static int LutModeLutRamDualRam32_Off[][] =
    { S0RAM.OFF, S1RAM.OFF };
  final public static int LutModeLutRamDualRam32_On[][] =
    { S0RAM.ON, S1RAM.ON };
  final public static int LutModeShift[][][][] =
    { { S0RAM.F_LUT_SHIFTER, S0RAM.G_LUT_SHIFTER },
      { S1RAM.F_LUT_SHIFTER, S1RAM.G_LUT_SHIFTER } };
  final public static int LutModeShift_Off[][][] =
    { { S0RAM.OFF, S1RAM.OFF },
      { S0RAM.OFF, S1RAM.OFF } };
  final public static int LutModeShift_On[][][] =
    { { S0RAM.ON, S1RAM.ON },
      { S0RAM.ON, S1RAM.ON } };

  // enable carry generation
  final public static int CarryEnable[][][][] =
    { { S0Control.XCarrySelect.XCarrySelect,
        S0Control.YCarrySelect.YCarrySelect },
      { S1Control.XCarrySelect.XCarrySelect,
        S1Control.YCarrySelect.YCarrySelect } };
  final public static int CarryEnable_Pass[][][] =
    { { S0Control.XCarrySelect.CARRY, S0Control.YCarrySelect.CARRY },
      { S1Control.XCarrySelect.CARRY, S1Control.YCarrySelect.CARRY } };
  final public static int CarryEnable_Modify[][][] =
    { { S0Control.XCarrySelect.LUT_CONTROL,
        S0Control.YCarrySelect.LUT_CONTROL },
      { S1Control.XCarrySelect.LUT_CONTROL,
        S1Control.YCarrySelect.LUT_CONTROL } };
  // select carry value, are only slice dependant, not Lut dependant
  final public static int CarryValue[][][] =
    { S0Control.AndMux.AndMux, S1Control.AndMux.AndMux };
  final public static int CarryValue_Zero[][] =
    { S0Control.AndMux.ZERO, S1Control.AndMux.ZERO };
  final public static int CarryValue_One[][] =
    { S0Control.AndMux.ONE, S1Control.AndMux.ONE };
  final public static int CarryValue_In1[][] =
    { S0Control.AndMux.IN1, S1Control.AndMux.IN1 };
  final public static int CarryValue_In1AndIn2[][] =
    { S0Control.AndMux.IN1_AND_IN2, S1Control.AndMux.IN1_AND_IN2 };
  // select carry input, are only slice dependant, not Lut dependant
  final public static int CarryBegin[][][] =
    { S0Control.Cin.Cin, S1Control.Cin.Cin };
  final public static int CarryBegin_PrevCarry[][] =
    { S0Control.Cin.CIN, S1Control.Cin.CIN };
  final public static int CarryBegin_FromInBx[][] =
    { S0Control.Cin.BX, S1Control.Cin.BX };

  // invert auxillary inputs
  final public static int InBInvert[][][][] =
    { { S0Control.BxInvert, S0Control.ByInvert },
      { S1Control.BxInvert, S1Control.ByInvert } };
  final public static int InBInvert_Off[][][] =
    { { S0Control.OFF, S0Control.OFF },
      { S1Control.OFF, S1Control.OFF } };
  final public static int InBInvert_On[][][] =
    { { S0Control.ON, S0Control.ON },
      { S1Control.ON, S1Control.ON } };

  // select main output
  final public static int OutFrom[][][][] =
    { { S0Control.X.X, S0Control.Y.Y },
      { S1Control.X.X, S1Control.Y.Y } };
  final public static int OutFrom_Lut[][][] =
    { { S0Control.X.FOUT, S0Control.Y.GOUT },
      { S1Control.X.FOUT, S1Control.Y.GOUT } };
  final public static int OutFrom_F56Mux[][][] =
    { { S0Control.X.F5, S0Control.Y.F6 },
      { S1Control.X.F5, S1Control.Y.F6 } };
  final public static int OutFrom_LutXorCarry[][][] =
    { { S0Control.X.FOUT_XOR_CARRY, S0Control.Y.GOUT_XOR_CARRY },
      { S1Control.X.FOUT_XOR_CARRY, S1Control.Y.GOUT_XOR_CARRY } };

  // select Q output
  final public static int OutQFrom[][][][] =
    { { S0Control.XDin.XDin, S0Control.YDin.YDin },
      { S1Control.XDin.XDin, S1Control.YDin.YDin } };
  final public static int OutQFrom_Out[][][] =
    { { S0Control.XDin.X, S0Control.YDin.Y },
      { S1Control.XDin.X, S1Control.YDin.Y } };
  final public static int OutQFrom_InB[][][] =
    { { S0Control.XDin.BX, S0Control.YDin.BY },
      { S1Control.XDin.BX, S1Control.YDin.BY } };

  // select B output, are only slice dependant, not Lut dependant
  final public static int OutBFrom[][][] =
    { S0Control.YB.YB, S1Control.YB.YB };
  final public static int OutBFrom_Carry[][] =
    { S0Control.YB.COUT, S1Control.YB.COUT };
  final public static int OutBFrom_InBy[][] =
    { S0Control.YB.BY, S1Control.YB.BY };

  // mode setting for flip-flop
  final public static int FlipflopLatch[][][] =
    { S0Control.LatchMode, S1Control.LatchMode };
  final public static int FlipflopLatch_Off[][] =
    { S0Control.OFF, S1Control.OFF };
  final public static int FlipflopLatch_On[][] =
    { S0Control.ON, S1Control.ON };
  // set FFs to syncronous reset
  final public static int FlipflopSyncreset[][][] =
    { S0Control.Sync, S1Control.Sync };
  final public static int FlipflopSyncreset_Off[][] =
    { S0Control.OFF, S1Control.OFF };
  final public static int FlipflopSyncreset_On[][] =
    { S0Control.ON, S1Control.ON };
  // set state after set/reset signal
  final public static int FlipflopResetTo[][][][] =
    { { S0Control.XffSetResetSelect, S0Control.YffSetResetSelect },
      { S1Control.XffSetResetSelect, S1Control.YffSetResetSelect } };
  final public static int FlipflopResetTo_GSR0InB1[][][] =
    { { S0Control.OFF, S0Control.OFF },
      { S1Control.OFF, S1Control.OFF } };
  final public static int FlipflopResetTo_GSR1InB0[][][] =
    { { S0Control.ON, S0Control.ON },
      { S1Control.ON, S1Control.ON } };
  // enable set/reset FFs from BY, are only slice dependant, not Lut dependant
  final public static int FlipflopResetByInB[][][] =
    { S0Control.InvertedSetReset, S1Control.InvertedSetReset };
  final public static int FlipflopResetByInB_Off[][] =
    { S0Control.OFF, S1Control.OFF };
  final public static int FlipflopResetByInB_On[][] =
    { S0Control.ON, S1Control.ON };

  // invert CLK, CE, SR, are only slice dependant, not Lut dependant
  final public static int ClockInvert[][][] =
    { S0Control.ClockInvert, S1Control.ClockInvert };
  final public static int ClockInvert_Off[][] =
    { S0Control.OFF, S1Control.OFF };
  final public static int ClockInvert_On[][] =
    { S0Control.ON, S1Control.ON };
  final public static int CeInvert[][][] =
    { S0Control.CeInvert, S1Control.CeInvert };
  final public static int CeInvert_Off[][] =
    { S0Control.OFF, S1Control.OFF };
  final public static int CeInvert_On[][] =
    { S0Control.ON, S1Control.ON };
  final public static int SrNonInvert[][][] =
    { S0Control.SrWeNotInvert, S1Control.SrWeNotInvert };
  final public static int SrNonInvert_Off[][] =
    { S0Control.OFF, S1Control.OFF };
  final public static int SrNonInvert_On[][] =
    { S0Control.ON, S1Control.ON };

  // config bit constrants for setting clock source for a slice

  // select clock source, when from global clocks, are only slice dependant
  final public static int ClockFrom[][][] =
    { S0Clk.S0Clk, S1Clk.S1Clk };
  final public static int ClockFrom_GCLK0[][] =
    { S0Clk.GCLK0, S1Clk.GCLK0 };
  final public static int ClockFrom_GCLK1[][] =
    { S0Clk.GCLK1, S1Clk.GCLK1 };
  final public static int ClockFrom_GCLK2[][] =
    { S0Clk.GCLK2, S1Clk.GCLK2 };
  final public static int ClockFrom_GCLK3[][] =
    { S0Clk.GCLK3, S1Clk.GCLK3 };


  // config bit constants for Fpga.set(Row, Col, SetLUT[Sli][Lut], function);

  final public static int LutFunction[][][][] =
    { { LUT.SLICE0_F, LUT.SLICE0_G },
      { LUT.SLICE1_F, LUT.SLICE1_G } };


  // pin constants for new Pin(Pin.CLB, Row, Col, pin[Sli][Lut]);

  // 4 LUT inputs
  final public static int PinIn1[][] =
    { { CenterWires.S0_F1, CenterWires.S0_G1 },
      { CenterWires.S1_F1, CenterWires.S1_G1 } };
  final public static int PinIn2[][] =
    { { CenterWires.S0_F2, CenterWires.S0_G2 },
      { CenterWires.S1_F2, CenterWires.S1_G2 } };
  final public static int PinIn3[][] =
    { { CenterWires.S0_F3, CenterWires.S0_G3 },
      { CenterWires.S1_F3, CenterWires.S1_G3 } };
  final public static int PinIn4[][] =
    { { CenterWires.S0_F4, CenterWires.S0_G4 },
      { CenterWires.S1_F4, CenterWires.S1_G4 } };

  // auxillary B inputs
  final public static int PinInB[][] =
    { { CenterWires.S0_BX, CenterWires.S0_BY },
      { CenterWires.S1_BX, CenterWires.S1_BY } };
  // special inputs, are only slice dependant, not Lut dependant
  final public static int PinClk[] =
    { CenterWires.S0_CLK, CenterWires.S1_CLK };
  final public static int PinCe[] =
    { CenterWires.S0_CE, CenterWires.S1_CE };
  final public static int PinSr[] =
    { CenterWires.S0_SR, CenterWires.S1_SR };

  // outputs
  final public static int PinOut[][] =
    { { CenterWires.S0_X, CenterWires.S0_Y },
      { CenterWires.S1_X, CenterWires.S1_Y } };
  final public static int PinOutQ[][] =
    { { CenterWires.S0_XQ, CenterWires.S0_YQ },
      { CenterWires.S1_XQ, CenterWires.S1_YQ } };
  final public static int PinOutB[][] =
    { { CenterWires.S0_XB, CenterWires.S0_YB },
      { CenterWires.S1_XB, CenterWires.S1_YB } };


// ------ auxilary code for 0/1 slice and F/G LUT independant coding section

  // routines to hide the use of Col:Sli|Row:Lut when setting config bits
  //   one each for logic cell level and slice level config bits

  public static void lcell(int What[][][][], int To[][][]) {
    try {
      Fpga.set(Row, Col, What[Sli][Lut], To[Sli][Lut]); }
    catch (ConfigurationException Ce) {
      System.out.println("Configuration exeption in lcell() at " + pos());
      System.out.println(Ce); } }

  public static void slice(int What[][][], int To[][]) {
    try {
      Fpga.set(Row, Col, What[Sli], To[Sli]); }
    catch (ConfigurationException Ce) {
      System.out.println("Configuration exeption in slice() at " + pos());
      System.out.println(Ce); } }

  // routines to hide the use of Col:Sli|Row:Lut when defining pins
  //   one each for logic cell level and slice level affecting pins

  public static Pin pin_l(int Which[][]) {
    return (new Pin(Pin.CLB, Row, Col, Which[Sli][Lut])); }

  public static Pin pin_s(int Which[]) {
    return (new Pin(Pin.CLB, Row, Col, Which[Sli])); }


// ------ auxilary code for LUT function generator setting section

  // allow setting LUTs faster than com.xilinx.JBits.Virtex.Expr
  //   and no run time parsing, no run time parse errors to test and trap for
  //   and also not F/G LUT dependant, as an additional advantage

  // bit patterns for out = f(I1..I4), use Java ~ & | and ^ to combine them
  final public static int I1 = 0xAAAA, I2 = 0xCCCC, I3 = 0xF0F0, I4 = 0xFF00;

  // routine to hide the use of Col:Sli|Row:Lut when setting LUT function
  //   also hides use of Util.InvertIntArray and Util.IntToIntArray
  // as side effect writes out this LUTs position/value/name into an listfile

  public static void lut(int Function, String Name) {
    try {
      Fpga.set(Row, Col, LutFunction[Sli][Lut],
        Util.InvertIntArray(Util.IntToIntArray(Function, 16))); }
    catch (ConfigurationException Ce) {
      System.out.println("Configuration exeption in lut() at " + pos());
      System.out.println(Ce); }
    // list this LUT into listfile, position, value, name
    // convert Function to Hex, pre-filled with zeros
    String FunctionHex = "0000"+Integer.toHexString(Function);
    FunctionHex = FunctionHex.substring(
      FunctionHex.length()-4, FunctionHex.length());
    ListFile.println(pos() + " " + FunctionHex + " " + Name); }


// ------ auxilary code for LUT comment writting section

  // allow putting text bit patterns into LUTs
  //   for layout display in BoardScope or chip viewer
  // uses LUT 16 bits displayed 0123/4567/89AB/CDEF as 4x4 pixel font

  // 4x4 pixel bit patterns, 0xabcd a=bottom d=top line, 1=left 8=right column

  final public static int WriteFont[] = {
    // all 32 control characters as full boxes
    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
    // space ! " # $ % & ' ( ) * + , - . /
    0x0000, 0x6066, 0x00AA, 0x6FF6, 0x7C5E, 0x9249, 0xE526, 0x0024,
    0x4224, 0x2442, 0x4EE4, 0x04E4, 0x2400, 0x00E0, 0x6600, 0x1248,
    // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
    0x6996, 0xE464, 0xF687, 0x786F, 0x4F51, 0x787F, 0x6972, 0x448F,
    0x6966, 0x4E96, 0x0404, 0x2404, 0x2124, 0xF0F0, 0x2484, 0x4496,
    // @ A B C D E F G H I J K L M N O
    0x6FFE, 0x9F96, 0x79F7, 0xE11E, 0x7997, 0xF17F, 0x171F, 0xE91E,
    0x99F9, 0xE44E, 0x344E, 0x9759, 0xF111, 0x9BF9, 0x9DB9, 0x6996,
    // P Q R S T U V W X Y Z [ \ ] ^ _
    0x1797, 0x6D96, 0x9797, 0x7C3E, 0x444F, 0x6999, 0x4699, 0x6DD9,
    0x9669, 0x4469, 0xF24F, 0x6226, 0x8421, 0x6446, 0x00A4, 0xF000,
    // ` a b c d e f g h i j k l m n o, a-o use A-O
    0x0042, 0x9F96, 0x79F7, 0xE11E, 0x7997, 0xF17F, 0x171F, 0xE91E,
    0x99F9, 0xE44E, 0x344E, 0x9759, 0xF111, 0x9BF9, 0x9DB9, 0x6996,
    // p q r s t u v w x y z { | } ~ del, p-z use P-Z, del as full box
    0x1797, 0x6D96, 0x9797, 0x7C3E, 0x444F, 0x6999, 0x4699, 0x6DD9,
    0x9669, 0x4469, 0xF24F, 0x6236, 0x4444, 0x64C6, 0x005A, 0xFFFF };

  // simple routine to hide implementation

  public static void luttext(char Char, String Name) {
    lut(WriteFont[Char], Name); }


// ------ auxilary code for hiding Router object section

  // simple routine to hide JRoute2 object and RouteException error
  //   one each for single and array target pins

  public static void route(Pin From, Pin To) {
    try {
      Router.route(From, To); }
    catch (RouteException Re) {
      System.out.println("Routing exeption in route() at " + pos());
      System.out.println(Re); } }

  public static void routem(Pin From, Pin ToArray[]) {
    try {
      Router.route(From, ToArray); }
    catch (RouteException Re) {
      System.out.println("Routing exeption in routem() at " + pos());
      System.out.println(Re); } }


// ------ actual PDP-10 design implementation sections from here on

  public static void pdp10() {


// ------ debugging configuration section

    // drive various FFs, for debugging display in BoardScope or chip viewer
    //   do not drive them in production systems, to reduce heat generation
    final int DebugDisp = 1;

    // prevent writing of RAMs, making them to ROMs, no need to track changes
    final int DebugMemROM = 0;
    final int DebugFmemROM = 0;


// ------ set up clocking section

    // set which clock of our board we will be using for its main clock
    final int SysClock[][] = ClockFrom_GCLK1;


// ------ setting up the PDP-10 system constants section

    // data and address bus widths
    final int DataBits = 36, AddrBits = 18;

    // PDP-10 is big endian, so data bits are numbered from MSB = 0 to LSB = 35
    final int DataMSB = 0, DataLSB = DataMSB+DataBits-1;

    // operators console panel shows address numbered from MSB = 18 to LSB = 35
    final int AddrMSB = 18, AddrLSB = AddrMSB+AddrBits-1;


// ------ read-in test program and data memory image section

    // read in an test data and program memory image from an separate file
    // file formatted as strings of 36 0/1 chars, one for each word

    // presently 0..47=48 (octal 000..057=060) words of memory
    final String RinFileName = "pdp10.mem";
    final int RinBegin = 000, RinEnd = 057;
    String RinImage[] = new String[RinEnd+1];

    // where to start, for setting initial program counter
    // defend against no words in file, set to an safe value
    int RinStartAt = 000;

    System.out.print("read-in " + RinBegin);

    try {
      BufferedReader RinFile = new BufferedReader(
        new FileReader(RinFileName));
      for (int RinWord = RinBegin; RinWord <= RinEnd; RinWord++) {
        // defend against too short file, ensure some content
        RinImage[RinWord] = "000000000000000000000000000000000000";
        String RinLine;
        if ((RinLine = RinFile.readLine()) != null) {
          // defend against too short lines, pad them out
          RinLine = RinLine+"000000000000000000000000000000000000";
          RinLine = RinLine.substring(0, DataBits);
          RinImage[RinWord] = RinLine;
          // set program start address to last read-in word
          RinStartAt = RinWord; } }
      RinFile.close(); }
    catch (FileNotFoundException Fe) {
      System.out.println("Memory image File " + RinFileName + " not found");
      System.out.println(Fe); }
    catch (IOException Io) {
      System.out.println("Error reading the memory image " + RinFileName);
      System.out.println(Io); }

    System.out.println(".." + RinEnd + ", start at " + RinStartAt);


// ------ numbering data path bits section

    // place text numbering bits, visible in BoardScope or chip viewer

    // Xilinx carry chains LSB->MSB = south->north together with
    //   south->north row allocation, results in building data path LSB->MSB
    // PDP-10 has big endian bit numbering, MSB=0, LSB=35
    //   so row 0 F LUT is LSB bit 35 .. row 17 G LUT is MSB bit 0

    // first step, tens 00, 10, 20, 30

    // announce what we are doing and where we are
    System.out.print("Bit Numbering, Tens: data " + pos());

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      if (Bit == 30) {
        luttext('3', "BitnumTens"+Bit); }
      if (Bit == 20) {
        luttext('2', "BitnumTens"+Bit); }
      if (Bit == 10) {
        luttext('1', "BitnumTens"+Bit); }
      if (Bit == 00) {
        luttext('0', "BitnumTens"+Bit); }
      nextlut(); }

    System.out.println(", unused " + pos());

    // tag this column:slice with an single char of text, N = Numbering
    //   so that we can easily find this section, in BoardScope or chip viewer
    // place tag in the last/top row:LUT of the FPGA
    lastlut();
    luttext('N', "tag Data Path Bit Numbering, Tens");

    // reserve space for this sections logic and restart row/LUT allocation
    nextsli();


    // second step, ones 0-9

    System.out.print("Bit Numbering 2, Ones: data " + pos());

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // yes, this algorithm is primitive
      //   but cut&paste is faster than looking up string operations
      if (Bit == 35 || Bit == 25 || Bit == 15 || Bit == 5 ) {
        luttext('5', "BitnumOnes"+Bit); }
      if (Bit == 34 || Bit == 24 || Bit == 14 || Bit == 4 ) {
        luttext('4', "BitnumOnes"+Bit); }
      if (Bit == 33 || Bit == 23 || Bit == 13 || Bit == 3 ) {
        luttext('3', "BitnumOnes"+Bit); }
      if (Bit == 32 || Bit == 22 || Bit == 12 || Bit == 2 ) {
        luttext('2', "BitnumOnes"+Bit); }
      if (Bit == 31 || Bit == 21 || Bit == 11 || Bit == 1 ) {
        luttext('1', "BitnumOnes"+Bit); }
      if (Bit == 30 || Bit == 20 || Bit == 10 || Bit == 0 ) {
        luttext('0', "BitnumOnes"+Bit); }
      if (Bit == 29 || Bit == 19 || Bit == 9 ) {
        luttext('9', "BitnumOnes"+Bit); }
      if (Bit == 28 || Bit == 18 || Bit == 8 ) {
        luttext('8', "BitnumOnes"+Bit); }
      if (Bit == 27 || Bit == 17 || Bit == 7 ) {
        luttext('7', "BitnumOnes"+Bit); }
      if (Bit == 26 || Bit == 16 || Bit == 6 ) {
        luttext('6', "BitnumOnes"+Bit); }
      nextlut(); }

    System.out.println(", unused " + pos());

    lastlut();
    luttext('2', "tag Data Path Bit Numbering 2, Ones");

    nextsli();


// ------ memory section

    // memory, 32 words for program and data
    // runs as octal 000..037 and then repeats as 040..077, 0100..0137, ...
    //   000..017 used by fast memory, so use this memory at 020..057
    // temporary on-chip memory for testing, LUT-RAM based
    //   later will be 512 word BRAM and even later large off-chip SRAM or DRAM

    // announce what we are doing and where we are
    System.out.print("Memory: data " + pos());

    // data path 2 16bit LUT-RAMs per bit, F5-Mux to select, no further logic

    // use array of pins for each single LUT input or output to be addressed
    // duplicate 35..32 address definitions because actually 2*16word memory
    Pin MemData_Addr16[] = new Pin[DataBits],
        MemData_Addr8F[] = new Pin[DataBits],
        MemData_Addr8G[] = new Pin[DataBits],
        MemData_Addr4F[] = new Pin[DataBits],
        MemData_Addr4G[] = new Pin[DataBits],
        MemData_Addr2F[] = new Pin[DataBits],
        MemData_Addr2G[] = new Pin[DataBits],
        MemData_Addr1F[] = new Pin[DataBits],
        MemData_Addr1G[] = new Pin[DataBits],
        MemData_Mdwm[] = new Pin[DataBits],
        MemData_ClkE[] = new Pin[DataBits],
        MemData[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // use F5-Mux, needs fitting F/G LUT pair, so align to F LUT
      alignlutf();
      // lower 4 address bits, for selecting 16bits in F LUT
      MemData_Addr1F[Bit] = pin_l(PinIn1);
      MemData_Addr2F[Bit] = pin_l(PinIn2);
      MemData_Addr4F[Bit] = pin_l(PinIn3);
      MemData_Addr8F[Bit] = pin_l(PinIn4);
      // insert first half of memory contents into F LUTs
      int MemDataPresetF = 0;
      // the address constants are octal, notice the leading zero
      // first 16 words become 3rd 16 words, because fast memory and addr wrap
      for (int Line = 057; Line >= 040; Line--) {
        char MemBitChar = RinImage[Line].charAt(Bit);
        int MemBit = (MemBitChar == '1') ? 1 : 0;
        // shift in bit from LSB of LUT
        MemDataPresetF = MemDataPresetF*2+MemBit; }
      lut(MemDataPresetF, "MemData"+Bit+".F");
      // configure LUT pairs to be 32bit LUT-RAMs
      slice(LutModeLutRamDualRam32, LutModeLutRamDualRam32_On);
      slice(LutModeLut, LutModeLut_Off);
      lcell(LutModeRam, LutModeRam_On);
      slice(LutModeRam32, LutModeRam32_On);
      // make LUT-RAMs writable
      MemData_ClkE[Bit] = pin_s(PinCe);
      // and clock them
      slice(ClockFrom, SysClock);
      // BX 5th address bit, for read F5-Mux and write enable selector
      MemData_Addr16[Bit] = pin_l(PinInB);
      // invert BX, because F5-Mux is wired 0/1=G/F
      lcell(InBInvert, InBInvert_On);
      lcell(OutFrom, OutFrom_F56Mux);
      MemData[Bit] = pin_l(PinOut);
      secondlut();
      MemData_Addr1G[Bit] = pin_l(PinIn1);
      MemData_Addr2G[Bit] = pin_l(PinIn2);
      MemData_Addr4G[Bit] = pin_l(PinIn3);
      MemData_Addr8G[Bit] = pin_l(PinIn4);
      // insert second half of memory contents into G LUTs
      int MemDataPresetG = 0;
      for (int Line = 037; Line >= 020; Line--) {
        char MemBitChar = RinImage[Line].charAt(Bit);
        int MemBit = (MemBitChar == '1') ? 1 : 0;
        MemDataPresetG = MemDataPresetG*2+MemBit; }
      lut(MemDataPresetG, "MemData"+Bit+".G");
      // set G LUT also to be LUT-RAM
      lcell(LutModeRam, LutModeRam_On);
      // BY data input for writing to LUT-RAMs
      MemData_Mdwm[Bit] = pin_l(PinInB);
      if (DebugDisp == 1) {
        // XQ automatically shows old memory data
        //   make YQ show new memory data written
        lcell(OutQFrom, OutQFrom_InB); }
      // 2 LUTs per bit, requires using 2 slices, gives zigzag placing of bits
      //   bit 35/33/../1 in first slice, bit 34/32/../0 in second slice
      //   zigzag placing of bit pairs 35/34, 33/32, .., 1/0
      // the  == 1  is to convert int 1 into boolean true
      nextzigzagsli(Bit%2 == 1); }

    System.out.print(", control " + pos());

    // array for wiring address bits to, from Mam
    //   Address bus goes 18..35, Java Arrays 0..n, so make these DataBits wide
    Pin MemAddr_Mam[] = new Pin[DataBits];

    // comments with  // #  are for auto-generating pdp10.lf by the lf script
    // #buffer Mem-Addr16 = MAM-Addr.31
    //   no logic, just buffer address bits to reduce fan-out of MAM outputs
    //     also hide distribution to multiple targets, and gives MemAddr array
    MemAddr_Mam[31] = pin_l(PinIn1);
    lut(I1, "MemAddr16");
    Pin MemAddr16 = pin_l(PinOut);
    routem(MemAddr16, MemData_Addr16);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer Mem-Addr8 = MAM-Addr.32
    MemAddr_Mam[32] = pin_l(PinIn1);
    lut(I1, "MemAddr8");
    Pin MemAddr8 = pin_l(PinOut);
    routem(MemAddr8, MemData_Addr8F);
    routem(MemAddr8, MemData_Addr8G);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer Mem-Addr4 = MAM-Addr.33
    MemAddr_Mam[33] = pin_l(PinIn1);
    lut(I1, "MemAddr4");
    Pin MemAddr4 = pin_l(PinOut);
    routem(MemAddr4, MemData_Addr4F);
    routem(MemAddr4, MemData_Addr4G);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer Mem-Addr2 = MAM-Addr.34
    MemAddr_Mam[34] = pin_l(PinIn1);
    lut(I1, "MemAddr2");
    Pin MemAddr2 = pin_l(PinOut);
    routem(MemAddr2, MemData_Addr2F);
    routem(MemAddr2, MemData_Addr2G);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer Mem-Addr1 = MAM-Addr.35
    MemAddr_Mam[35] = pin_l(PinIn1);
    lut(I1, "MemAddr1");
    Pin MemAddr1 = pin_l(PinOut);
    routem(MemAddr1, MemData_Addr1F);
    routem(MemAddr1, MemData_Addr1G);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control ClkE-Mem = (~Mdrm-Mux=R)&MdwmClkE-Mem
    Pin MemClkE_MdrmSelFmem = pin_l(PinIn1);
    Pin MemClkE_MdwmMemClkE = pin_l(PinIn2);
    if (DebugMemROM == 1) {
      lut(0x0000, "MemClkE, forced to allways disabled by debug"); }
    else {
      lut((~I1)&I2, "MemClkE"); }
    Pin MemClkE = pin_l(PinOut);
    routem(MemClkE, MemData_ClkE);
    if (DebugDisp == 1) {
      // show whether this control is triggering what it controls
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('M', "tag Memory");

    // reserve space for this sections logic and restart row/LUT allocation
    //   used 2 slices because of zigzagging, this and next, so alloc 2 slices
    nextsli(); nextsli();


// ------ fast memory section

    // fast memory, 16 words, for accelerating accumulators/index registers
    // at present this is an KA-10 style "bolt on the side" design
    //   the rest of the processor does not know about this fast memory
    //   in particular accumulator-only data is addressed through the MAM
    //   and there is no same-time fetch of C(AC) and C(MA) from both memories

    System.out.print("Fast memory: data " + pos());

    // data path 1 LUT-RAM per bit, no further logic

    Pin FmemData_Addr8[] = new Pin[DataBits],
        FmemData_Addr4[] = new Pin[DataBits],
        FmemData_Addr2[] = new Pin[DataBits],
        FmemData_Addr1[] = new Pin[DataBits],
        FmemData_Mdwm[] = new Pin[DataBits],
        FmemData_ClkE[] = new Pin[DataBits/2],
        FmemData[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      FmemData_Addr1[Bit] = pin_l(PinIn1);
      FmemData_Addr2[Bit] = pin_l(PinIn2);
      FmemData_Addr4[Bit] = pin_l(PinIn3);
      FmemData_Addr8[Bit] = pin_l(PinIn4);
      // insert fast memory contents into LUTs
      int FmemDataPreset = 0;
      for (int Line = 017; Line >= 000; Line--) {
        char FmemBitChar = RinImage[Line].charAt(Bit);
        int FmemBit = (FmemBitChar == '1') ? 1 : 0;
        // shift in bit from LSB of LUT
        FmemDataPreset = FmemDataPreset*2+FmemBit; }
      lut(FmemDataPreset, "FmemData"+Bit);
      // configure LUTs to be 16bit LUT-RAMs
      slice(LutModeLutRamDualRam32, LutModeLutRamDualRam32_Off);
      slice(LutModeLut, LutModeLut_Off);
      lcell(LutModeRam, LutModeRam_On);
      // BY data input for writing to LUT-RAMs
      FmemData_Mdwm[Bit] = pin_l(PinInB);
      // make LUT-RAMs writable
      if (Bit%2 == 1) {
        // this must be only once per slice, as only 1 CE pin per slice
        FmemData_ClkE[Bit/2] = pin_s(PinCe); }
      // and clock them
      slice(ClockFrom, SysClock);
      FmemData[Bit] = pin_l(PinOut);
      nextlut(); }

    System.out.print(", control " + pos());

    Pin FmemAddr_Mam[] = new Pin[DataBits];

    // #buffer Fmem-Addr8 = MAM-Addr.32
    //   no logic, just buffer address bits to reduce fan-out of MAM outputs
    //     also hide distribution to multiple targets, and gives FmemAddr array
    FmemAddr_Mam[32] = pin_l(PinIn1);
    lut(I1, "FmemAddr8");
    Pin FmemAddr8 = pin_l(PinOut);
    routem(FmemAddr8, FmemData_Addr8);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer Fmem-Addr4 = MAM-Addr.33
    FmemAddr_Mam[33] = pin_l(PinIn1);
    lut(I1, "FmemAddr4");
    Pin FmemAddr4 = pin_l(PinOut);
    routem(FmemAddr4, FmemData_Addr4);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer Fmem-Addr2 = MAM-Addr.34
    FmemAddr_Mam[34] = pin_l(PinIn1);
    lut(I1, "FmemAddr2");
    Pin FmemAddr2 = pin_l(PinOut);
    routem(FmemAddr2, FmemData_Addr2);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer Fmem-Addr1 = MAM-Addr.35
    FmemAddr_Mam[35] = pin_l(PinIn1);
    lut(I1, "FmemAddr1");
    Pin FmemAddr1 = pin_l(PinOut);
    routem(FmemAddr1, FmemData_Addr1);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control ClkE-Fmem = Mdrm-Mux=R&MdwmClkE-Fmem
    Pin FmemClkE_MdrmSelFmem = pin_l(PinIn1);
    Pin FmemClkE_MdwmMemClkE = pin_l(PinIn2);
    if (DebugFmemROM == 1) {
      lut(0x0000, "FmemClkE, forced to allways disabled by debug"); }
    else {
      lut(I1&I2, "FmemClkE"); }
    Pin FmemClkE = pin_l(PinOut);
    routem(FmemClkE, FmemData_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('F', "tag Fast Memory");

    nextsli();

 
// ------ memory data read mux section

    // switch read data between memory and fast memory

    System.out.print("Memory Data Read Mux: data " + pos());

    // data path 2:1-Mux with common select line, in from memory or fast memory

    Pin MdrmData_Mem[] = new Pin[DataBits],
        MdrmData_Fmem[] = new Pin[DataBits],
        MdrmData_SelFmem[] = new Pin[DataBits],
        MdrmData[] = new Pin[DataBits];

    // get around JBits "feature" that it runs out of routing resources
    // so many connections from MdrmData that it runs out of output muxes
    //   this is because JBits routes each route from scratch from XQ/YQ Pin
    //     and so the 8 output muxes per CLB become too limited
    // so make memory bus of DataBits arrays and route each bit once at end
    //   instead of route(MdrmData[?], <target-Pin-name>);
    //     use MdrmDataBus[Bit][MdrmDataPos[Bit]++] = <target-Pin-name>;
    //   at end then once do routem(MdrmData[Bit], MdrmDataBus[Bit]);
    // bus length of 100 is chosen to be way larger then ever needed
    //   will be shortened to right lenth before doing actual routing
    Pin MdrmDataBus[][] = new Pin[DataBits][100];
    // count and index using positions on the MdrmDataBus, for each Bit
    //   this is used for allocating space and at end for shortening
    int MdrmDataPos[] = new int[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      MdrmData_Mem[Bit] = pin_l(PinIn1);
      route(MemData[Bit], MdrmData_Mem[Bit]);
      MdrmData_Fmem[Bit] = pin_l(PinIn2);
      route(FmemData[Bit], MdrmData_Fmem[Bit]);
      MdrmData_SelFmem[Bit] = pin_l(PinIn3);
      lut(I1&(~I3)|I2&I3, "MdrmData"+Bit);
      MdrmData[Bit] = pin_l(PinOut);
      if (DebugDisp == 1) {
        // show what data Mdrm sent to processor for last read
        slice(ClockFrom, SysClock); }
      nextlut(); }

    System.out.print(", control " + pos());

    Pin MdrmSelFmem_Mam[] = new Pin[DataBits];

    // #control MDRM-Mux=Fmem = addr<020 -> OR of address bits 18..31 = 0
    //   = NOR of 18-4=14 address bits = NOR(OR(31..24)+23..18)
    // first the 8-OR = 4-OR OR 4-OR
    // use F5-Mux as OR, needs fitting F/G LUT pair, so align to F LUT
    alignlutf();
    MdrmSelFmem_Mam[18] = pin_l(PinIn1);
    MdrmSelFmem_Mam[19] = pin_l(PinIn2);
    MdrmSelFmem_Mam[20] = pin_l(PinIn3);
    MdrmSelFmem_Mam[21] = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "MdrmSelFmem1.F");
    // use F5-Mux as an OR of LUTs F and G
    Pin MdrmSelFmem1_FromG = pin_l(PinInB);
    // invert BX, because F5-Mux is wired 0/1=G/F
    lcell(InBInvert, InBInvert_On);
    lcell(OutFrom, OutFrom_F56Mux);
    Pin MdrmSelFmem1 = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    secondlut();
    MdrmSelFmem_Mam[22] = pin_l(PinIn1);
    MdrmSelFmem_Mam[23] = pin_l(PinIn2);
    MdrmSelFmem_Mam[24] = pin_l(PinIn3);
    MdrmSelFmem_Mam[25] = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "MdrmSelFmem1.G");
    Pin MdrmSelFmem1G = pin_l(PinOut);
    route(MdrmSelFmem1G, MdrmSelFmem1_FromG);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // then the 7-NOR = 3-NOR AND 4-NOR
    // use F5-Mux as AND, needs fitting F/G LUT pair, so align to F LUT
    alignlutf();
    Pin MdrmSelFmem_Sel1 = pin_l(PinIn1);
    route(MdrmSelFmem1, MdrmSelFmem_Sel1);
    MdrmSelFmem_Mam[26] = pin_l(PinIn2);
    MdrmSelFmem_Mam[27] = pin_l(PinIn3);
    MdrmSelFmem_Mam[28] = pin_l(PinIn4);
    lut(~(I1|I2|I3|I4), "MdrmSelFmem.F");
    // use F5-Mux as an AND of LUTs F and G
    Pin MdrmSelFmem_FromG = pin_l(PinInB);
    // dont invert BX, because F5-Mux is already wired 0/1=G/F
    lcell(OutFrom, OutFrom_F56Mux);
    Pin MdrmSelFmem = pin_l(PinOut);
    routem(MdrmSelFmem, MdrmData_SelFmem);
    route(MdrmSelFmem, MemClkE_MdrmSelFmem);
    route(MdrmSelFmem, FmemClkE_MdrmSelFmem);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    secondlut();
    MdrmSelFmem_Mam[29] = pin_l(PinIn1);
    MdrmSelFmem_Mam[30] = pin_l(PinIn2);
    MdrmSelFmem_Mam[31] = pin_l(PinIn3);
    lut(~(I1|I2|I3), "MdrmSelFmem.G");
    Pin MdrmSelFmemG = pin_l(PinOut);
    route(MdrmSelFmemG, MdrmSelFmem_FromG);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('R', "tag Memory Data Read Mux");

    nextsli();


// ------ memory address mux section

    // select what address source addresses memory, PC or IR.X or MA or IR.AC
    //   extend 4bit addresses of IR.X and IR.AC with zeros to 18bit

    System.out.print("Memory Address Mux: data " + pos());

    // data path 4:1-Mux with 4 separate enables, wide-OR of 2 LUTs as 2ANDs+OR

    Pin MamAddr_Iridx[] = new Pin[DataBits],
        MamAddr_EnIridx[] = new Pin[AddrBits],
        MamAddr_Irac[] = new Pin[DataBits],
        MamAddr_EnIrac[] = new Pin[AddrBits],
        MamAddr_Prog[] = new Pin[DataBits],
        MamAddr_EnProg[] = new Pin[AddrBits],
        MamAddr_Maddr[] = new Pin[DataBits],
        MamAddr_EnMaddr[] = new Pin[AddrBits],
        MamAddr[] = new Pin[DataBits];

    for (int Bit = AddrLSB; Bit >= AddrMSB; Bit--) {
      // use F5-Mux, needs fitting F/G LUT pair, so align to F LUT
      alignlutf();
      // use Bit-18, because Bit goes 35..18, not 17..0 as Java arrays go
      //   to save typing, all single-route arrays DataBits wide, ignore 0..17
      MamAddr_Iridx[Bit] = pin_l(PinIn1);
      MamAddr_EnIridx[Bit-18] = pin_l(PinIn2);
      MamAddr_Irac[Bit] = pin_l(PinIn3);
      MamAddr_EnIrac[Bit-18] = pin_l(PinIn4);
      if (Bit >= 32) {
        // lowest 4 bits 35..32: addressable by IR.AC or IR.X
        // MAM has 4 address inputs, use Wide-OR
        lut(~(I1&I2|I3&I4), "MamAddr"+Bit+".F");
        // wide-OR with LUT=0 -> !BX (=0) and LUT=1 -> AndMux.ONE (=1)
        lcell(CarryEnable, CarryEnable_Modify);
        slice(CarryValue, CarryValue_One);
        slice(CarryBegin, CarryBegin_FromInBx);
        lcell(InBInvert, InBInvert_On);
        if (DebugDisp == 1) {
          // show what address Mam selected for last memory access
          // make XQ show blank to not distract, take it from !BX = !1 = 0
          lcell(OutQFrom, OutQFrom_InB);
          slice(ClockFrom, SysClock); } }
      secondlut();
      MamAddr_Prog[Bit] = pin_l(PinIn1);
      MamAddr_EnProg[Bit-18] = pin_l(PinIn2);
      MamAddr_Maddr[Bit] = pin_l(PinIn3);
      MamAddr_EnMaddr[Bit-18] = pin_l(PinIn4);
      if (Bit >= 32) {
        // lowest 4 bits 35..32: addressable by IR.AC or IR.X, rest of Wide-OR
        lut(~(I1&I2|I3&I4), "MamAddr"+Bit+".G");
        lcell(CarryEnable, CarryEnable_Modify);
        MamAddr[Bit] = pin_l(PinOutB);
        route(MamAddr[Bit], MemAddr_Mam[Bit]);
        route(MamAddr[Bit], FmemAddr_Mam[Bit]);
        if (DebugDisp == 1) {
          // make YQ FF show MamMux, from YB via BY to YQ
          route(pin_l(PinOutB), pin_l(PinInB));
          lcell(OutQFrom, OutQFrom_InB); } }
      else {
        // other address bits: only 2 inputs PC or MA, zero if IR.AC or IR.X
        lut(I1&I2|I3&I4, "MamMux"+Bit);
        MamAddr[Bit] = pin_l(PinOut);
        if (Bit == 31) {
          // only bits up to here are used by the present temporary memory
          route(MamAddr[Bit], MemAddr_Mam[Bit]); }
        route(MamAddr[Bit], MdrmSelFmem_Mam[Bit]);
        if (DebugDisp == 1) {
          // XQ will be blank (F LUT=0), make YQ show MamMux (G LUT)
          slice(ClockFrom, SysClock); } }
      nextzigzagsli(Bit%2 == 1); }

    // jump the space for non-existant address bits
    for (int Bit = AddrMSB-1; Bit >= DataMSB; Bit--) {
      nextlut(); }

    System.out.print(", control " + pos());

    // #control MAM-Mux=PC = insMAM-Mux=PC
    //   only 1 input, but LUT to buffer signal to reduce fan-out load
    //     gives faster instruction decode, slower mux switching irrelevant
    Pin MamEnProg_Irma = pin_l(PinIn1);
    lut(I1, "MamEnProg");
    Pin MamEnProg = pin_l(PinOut);
    routem(MamEnProg, MamAddr_EnProg);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control MAM-Mux=IR.X = insMAM-Mux=IR.X
    Pin MamEnIridx_Irma = pin_l(PinIn1);
    lut(I1, "MamEnIridx");
    Pin MamEnIridx = pin_l(PinOut);
    routem(MamEnIridx, MamAddr_EnIridx);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control MAM-Mux=MA = ...|atestMAM-Mux=MA|logicMAM-Mux=MA|
    // #  hwordMAM-Mux=MA|btestMAM-Mux=MA|..|insMAM-Mux=MA
    // # this will grow with other instruction groups adding subparts
    // use F5-Mux as OR, needs fitting F/G LUT pair, so align to F LUT
    alignlutf();
    Pin MamEnMaddr_Atest = pin_l(PinIn1);
    Pin MamEnMaddr_Logic = pin_l(PinIn2);
    Pin MamEnMaddr_Hword = pin_l(PinIn3);
    Pin MamEnMaddr_Btest = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "MamEnMaddr.F");
    // use F5-Mux as an OR of LUTs F and G
    Pin MamEnMaddr_FromG = pin_l(PinInB);
    // invert BX, because F5-Mux is wired 0/1=G/F
    lcell(InBInvert, InBInvert_On);
    lcell(OutFrom, OutFrom_F56Mux);
    Pin MamEnMaddr = pin_l(PinOut);
    routem(MamEnMaddr, MamAddr_EnMaddr);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    secondlut();
    Pin MamEnMaddr_Irma = pin_l(PinIn4);
    lut(I4, "MamEnMaddr.G");
    Pin MamEnMaddrG = pin_l(PinOut);
    route(MamEnMaddrG, MamEnMaddr_FromG);
    nextlut();
    // #control MAM-Mux=IR.AC = ...|atestMAM-Mux=IR.AC|logicMAM-Mux=IR.AC|
    // #  hwordMAM-Mux=IR.AC|btestMAM-Mux=IR.AC|...
    // # this will grow with other instruction groups adding subparts
    Pin MamEnIrac_Atest = pin_l(PinIn1);
    Pin MamEnIrac_Logic = pin_l(PinIn2);
    Pin MamEnIrac_Hword = pin_l(PinIn3);
    Pin MamEnIrac_Btest = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "MamEnIrac");
    Pin MamEnIrac = pin_l(PinOut);
    routem(MamEnIrac, MamAddr_EnIrac);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('A', "tag Memory Address Mux");

    nextsli(); nextsli();


// ------ program counter and incrementer section

    // generate addresses for fetching instructions - increment, skip, jump

    System.out.print("Program Counter: data " + pos());

    // data path 2:1-Mux with common select from jump 0,,E/MA or incr/skip PC+1
    //   program counter register in next instruction computation units FFs

    Pin ProgNext_SelIncr[] = new Pin[AddrBits],
        ProgNext_Old[] = new Pin[DataBits],
        ProgNext_Maddr[] = new Pin[DataBits],
        ProgNext_Cin[] = new Pin[DataBits];

    Pin ProgReg_ClkE[] = new Pin[AddrBits/2],
        ProgReg[] = new Pin[DataBits];

    for (int Bit = AddrLSB; Bit >= AddrMSB; Bit--) {
      // use Bit-18, because Bit goes 35..18, not 17..0 as Java arrays go
      // because of conditional addition with multiplier-AND gate
      //   ProgNext_SelIncr=PinIn1 and increment=1 and ProgNext_Old=PinIn2
      ProgNext_SelIncr[Bit-18] = pin_l(PinIn1);
      ProgNext_Old[Bit] = pin_l(PinIn2);
      ProgNext_Maddr[Bit] = pin_l(PinIn3);
      lut(I1&I2|(~I1)&I3, "ProgNext"+Bit);
      if (Bit == AddrLSB) {
        // increment by carry so adders need no const 1 input, carry-in via BX
        slice(CarryBegin, CarryBegin_FromInBx);
        // ProgNext_Cin as array because of Java compiler bug
        //   aborts with "may not be initialised" error
        //     instead of just warning, and no way to suppress it
        ProgNext_Cin[Bit] = pin_l(PinInB); }
      lcell(CarryEnable, CarryEnable_Modify);
      // conditional adder, as only "skip" uses carry
      slice(CarryValue, CarryValue_In1AndIn2);
      // and use carry adder result
      lcell(OutFrom, OutFrom_LutXorCarry);
      slice(ClockFrom, SysClock);
      if (Bit%2 == 1) {
        ProgReg_ClkE[(Bit-18)/2] = pin_s(PinCe); }
      // set initial program counter to address of last read-in mem word
      // we are using GSR to reset, so select SR as source for right sense
      if ((RinStartAt & (1<<(DataLSB-Bit))) != 0) {
        lcell(FlipflopResetTo, FlipflopResetTo_GSR1InB0); }
      else {
        lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1); }
      // use sync reset, no docs on when to use, but likely intended better
      slice(FlipflopSyncreset, FlipflopSyncreset_On);
      ProgReg[Bit] = pin_l(PinOutQ);
      route(ProgReg[Bit], ProgNext_Old[Bit]);
      route(ProgReg[Bit], MamAddr_Prog[Bit]);
      nextlut(); }

    // jump the space for non-existant address bits
    for (int Bit = AddrMSB-1; Bit >= DataMSB; Bit--) {
      nextlut(); }

    System.out.print(", control " + pos());

    // #control PC-Mux=PC+1 =
    // #  insPC-Mux=PC+1|..|atestPC-Mux=PC+1|btestPC-Mux=PC+1|...
    // # this will grow when skip instructions are added
    //   default (non increment/skip) selection is to load/jump from MA
    // #carrygenerator PCcarry = PC-Mux=PC+1
    //   increment/SKIP: I+1, carry-in 1
    //   JUMP:           I+0, carry-in 0
    //   same logic function, no own LUT, just second name ProgSelIncr pin
    Pin ProgSelIncr_Irma = pin_l(PinIn1);
    Pin ProgSelIncr_Atest = pin_l(PinIn2);
    Pin ProgSelIncr_Btest = pin_l(PinIn3);
    lut(I1|I2|I3, "ProgSelIncr and ProgCin");
    Pin ProgSelIncr = pin_l(PinOut);
    routem(ProgSelIncr, ProgNext_SelIncr);
    Pin ProgCin = ProgSelIncr;
    route(ProgCin, ProgNext_Cin[AddrLSB]);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control ClkE-PC = insClkE-PC|..|atestClkE-PC|btestClkE-PC|...
    // # this will grow when skip or jump instructions are added
    Pin ProgClkE_Irma = pin_l(PinIn1);
    Pin ProgClkE_Atest = pin_l(PinIn2);
    Pin ProgClkE_Btest = pin_l(PinIn3);
    lut(I1|I2|I3, "ProgClkE");
    Pin ProgCLkE = pin_l(PinOut);
    routem(ProgCLkE, ProgReg_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('P', "tag Program Counter");

    nextsli();


// ------ instruction and memory address register section

    // IR hold/modify the current instruction for later decoding it
    // MA hold/compute the current value of 0,,E/MA for addressing memory

    System.out.print("Instruction Register: data " + pos());

    // IR data path 2:1-Mux with complex enables for MD or IR or 0
    //   instruction register in instruction loader units FFs, bits 0..17
    // MA data path 2:1-Mux with complex enables for MD or MA+MD
    //   mem address register in instruction loader units FFs, bits 18..35

    Pin IrmaIns_SelIndex[] = new Pin[DataBits],
        IrmaIns_Old[] = new Pin[DataBits],
        IrmaIns_SelIndir[] = new Pin[DataBits],
        IrmaIns_Mem[] = new Pin[DataBits];

    Pin IrmaReg_ClkE[] = new Pin[DataBits/2],
        IrmaReg[] = new Pin[DataBits];

    // same get around JBits routing "feature" as with MdrmDataBus
    Pin IrmaInstrBus[][] = new Pin[DataBits][100];
    int IrmaInstrPos[] = new int[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // because of conditional addition with multiplier-AND gate
      //  IrmaIns_SelIndex=PinIn1 and index=1 and IrmaIns_Old=PinIn2
      IrmaIns_SelIndex[Bit] = pin_l(PinIn1);
      IrmaIns_Old[Bit] = pin_l(PinIn2);
      IrmaIns_SelIndir[Bit] = pin_l(PinIn3);
      IrmaIns_Mem[Bit] = pin_l(PinIn4);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = IrmaIns_Mem[Bit];
      // instruction format bit 0         1         2         3
      //                        012345678901234567890123456789012345
      //                        OOOOOOOOOAAAAIXXXXYYYYYYYYYYYYYYYYYY
      //                     IR.OP       AC  IX   Y=MA
      if (Bit >= 18) {
        // MA: insget/indir (I1=0) load mem data (I4)
        //     index (I1=1) add mem data (I4) to old (I2)
        lut((~I1)&I4|I1&(I2^I4), "IrmaIns MA"+Bit);
        if (Bit == DataLSB) {
          // index add carry-in=0, BX is 1 not connect, so invert for Cin=0
          slice(CarryBegin, CarryBegin_FromInBx);
          lcell(InBInvert, InBInvert_On); }
        lcell(CarryEnable, CarryEnable_Modify);
        slice(CarryValue, CarryValue_In1AndIn2);
        // and use carry adder result
        lcell(OutFrom, OutFrom_LutXorCarry); }
      if (Bit <= 17 && Bit >= 14) {
        // IR.X: insget/indir (I1=0) load mem data (I4)
        //       index (I1=1) clear (set to zero)
        lut((~I1)&I4, "IrmaIns IR.X"+Bit); }
      if (Bit == 13) {
        // IR.I: insget/indir (I1=0) load mem data (I4)
        //       index (I1=1) keep old (I2)
        lut((~I1)&I4|I1&I2, "IrmaIns IR.I"+Bit); }
      if (Bit <= 12) {
        // IR.AC+IR.OP: insget (I1=0 AND I3=0) load mem data (I4)
        //              index/indir (I1=1 OR I3=1) keep old (I2)
        lut((~I1)&(~I3)&I4|(I1|I3)&I2, "IrmaIns IR.AC+IR.OP"+Bit); }
      slice(ClockFrom, SysClock);
      if (Bit%2 == 1) {
        IrmaReg_ClkE[Bit/2] = pin_s(PinCe); }
      IrmaReg[Bit] = pin_l(PinOutQ);
      // wire MA and IR.X and IR.AC bits to their memory address mux inputs
      IrmaInstrBus[Bit][IrmaInstrPos[Bit]++] = IrmaIns_Old[Bit];
      // use the 3 address fields, IR.X and IR.AC fields shifted
      if (Bit >= 18) {
        // MA: wire to memory mux and PC jump load
        IrmaInstrBus[Bit][IrmaInstrPos[Bit]++] = MamAddr_Maddr[Bit];
        IrmaInstrBus[Bit][IrmaInstrPos[Bit]++] = ProgNext_Maddr[Bit]; }
      if (Bit <= 17 && Bit >= 14) {
        // IR.X: wire to memory mux for index register use
        IrmaInstrBus[Bit][IrmaInstrPos[Bit]++] = MamAddr_Iridx[Bit+18]; }
      if (Bit <= 12 && Bit >= 9) {
        // IR.AC: wire to memory mux for accumulator use
        IrmaInstrBus[Bit][IrmaInstrPos[Bit]++] = MamAddr_Irac[Bit+23]; }
      nextlut(); }

    System.out.print(", control " + pos());

    // central part of processor state logic diagram
    // comments with  // @  are for auto-generating pdp10.sld by the sld script
    // @in state insget (Instruction Get)
    // @  MAM-Mux=PC
    // @  IR.OP-Mux=IR.AC-Mux=IR.I-Mux=IR.X-Mux=MA-Mux=MD
    // @  ClkE-IR+MA=1
    // @  PC-Mux=PC+1
    // @  ClkE-PC=1
    // @  state=insexec
    // @in state insexec (Instruction Execute)
    // @  if IR.X
    // @    MAM-Mux=IR.X
    // @    IR.OP-Mux=IR.AC-Mux=IR.I-Mux=IR
    // @    IR.X-Mux=0
    // @    MA-Mux=MA+MD
    // @    ClkE-IR+MA=1
    // @    state=insexec
    // @  elseif IR.I
    // @    MAM-Mux=MA
    // @    IR.OP-Mux=IR.AC-Mux=IR
    // @    IR.I-Mux=IR.X-Mux=MA-Mux=MD
    // @    ClkE-IR+MA=1
    // @    state=insexec
    // @  elseif ... insdecode (Instruction Decode)
    // @    see the instruction unit sections for elseif continuations
    // @  else
    // @    unimplemented opcode, not tested for, FSM simply hangs
    // @    in the end all opcodes will be accounted for, until then avoid

    // central part of instruction execution Finite State Machine
    //   implementated as an "one hot" FSM, this has an 1 bit "hopping" around
    //     from one active state to the next, creating an "dance" of states
    //   one LUT and its FF per state is used to trigger and hold that state
    //     various auxillary LUTs are used to detect conditions for these
    //   the LUTs implement "come from" logic to select when their state is due
    //     opposed to the processor state logic diagrams state= "go to" logic
    // #state insget = ...|atestinsget|logicinsget|hwordinsget|btestinsget|...
    // # this will grow with other instruction groups adding subparts
    //   if come from just finishing an instruction, any of 8 groups
    Pin IrmaInsget_Atest = pin_l(PinIn1);
    Pin IrmaInsget_Logic = pin_l(PinIn2);
    Pin IrmaInsget_Hword = pin_l(PinIn3);
    Pin IrmaInsget_Btest = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "IrmaInsget");
    slice(ClockFrom, SysClock);
    // on reset set execution FSM bit to start-up in insget state
    //   all other FSM state bits are cleared on reset
    lcell(FlipflopResetTo, FlipflopResetTo_GSR1InB0);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    // use PinOutQ to use Flip-Flop as FSM bit for this state
    Pin IrmaInsget = pin_l(PinOutQ);
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #detect insacc = IR.AC.0|IR.AC.1|IR.AC.2|IR.AC.3
    //   detect if this is an accumulator !=0 instruction
    //   no use in decoder, but in mult instr units, for facultative AC storing
    Pin IrmaInsacc_Iracc9 = pin_l(PinIn1);
    IrmaInstrBus[9][IrmaInstrPos[9]++] = IrmaInsacc_Iracc9;
    Pin IrmaInsacc_Iracc10 = pin_l(PinIn2);
    IrmaInstrBus[10][IrmaInstrPos[10]++] = IrmaInsacc_Iracc10;
    Pin IrmaInsacc_Iracc11 = pin_l(PinIn3);
    IrmaInstrBus[11][IrmaInstrPos[11]++] = IrmaInsacc_Iracc11;
    Pin IrmaInsacc_Iracc12 = pin_l(PinIn4);
    IrmaInstrBus[12][IrmaInstrPos[12]++] = IrmaInsacc_Iracc12;
    lut(I1|I2|I3|I4, "IrmaInsacc");
    Pin IrmaInsacc = pin_l(PinOut);
    // same get around JBits routing "feature" as with MdrmDataBus
    Pin IrmaInsaccBus[] = new Pin[100];
    int IrmaInsaccPos = 0;
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #detect insindex = IR.X.0|IR.X.1|IR.X.2|IR.X.3
    //   detect if this is an indexed instruction, to be de-indexed
    Pin IrmaInsindex_Iridx14 = pin_l(PinIn1);
    IrmaInstrBus[14][IrmaInstrPos[14]++] = IrmaInsindex_Iridx14;
    Pin IrmaInsindex_Iridx15 = pin_l(PinIn2);
    IrmaInstrBus[15][IrmaInstrPos[15]++] = IrmaInsindex_Iridx15;
    Pin IrmaInsindex_Iridx16 = pin_l(PinIn3);
    IrmaInstrBus[16][IrmaInstrPos[16]++] = IrmaInsindex_Iridx16;
    Pin IrmaInsindex_Iridx17 = pin_l(PinIn4);
    IrmaInstrBus[17][IrmaInstrPos[17]++] = IrmaInsindex_Iridx17;
    lut(I1|I2|I3|I4, "IrmaInsindex");
    Pin IrmaInsindex = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #state insexec = insget|insexec&(insindex|IR.I)
    //   if come from insget or were insexec and do de-indexing or deref'ing
    Pin IrmaInsexec_Insget = pin_l(PinIn1);
    route(IrmaInsget, IrmaInsexec_Insget);
    Pin IrmaInsexec_Insexec = pin_l(PinIn2);
    Pin IrmaInsexec_Insidx = pin_l(PinIn3);
    route(IrmaInsindex, IrmaInsexec_Insidx);
    Pin IrmaInsexec_Irind13 = pin_l(PinIn4);
    IrmaInstrBus[13][IrmaInstrPos[13]++] = IrmaInsexec_Irind13;
    lut(I1|I2&(I3|I4), "IrmaInsexec");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin IrmaInsexec = pin_l(PinOutQ);
    route(IrmaInsexec, IrmaInsexec_Insexec);
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #detect insdecode = insexec&(~insindex)&(~IR.I)
    //   if in insexec and neither index nor indir, is ready to decode
    //   further decoding will be completed in appropriate instr units below
    Pin IrmaInsdecode_Exec = pin_l(PinIn1);
    route(IrmaInsexec, IrmaInsdecode_Exec);
    Pin IrmaInsdecode_Index = pin_l(PinIn2);
    route(IrmaInsindex, IrmaInsdecode_Index);
    Pin IrmaInsdecode_Irind13 = pin_l(PinIn3);
    IrmaInstrBus[13][IrmaInstrPos[13]++] = IrmaInsdecode_Irind13;
    lut(I1&(~I2)&(~I3), "IrmaInsdecode");
    Pin IrmaInsdecode = pin_l(PinOut);
    // same get around JBits routing "feature" as with MdrmDataBus
    Pin IrmaInsdecodeBus[] = new Pin[100];
    int IrmaInsdecodePos = 0;
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // this FSM is extended in various instruction unit sections
    //   general pattern is there:
    //     decode if instruction group, from insdecode and IR.OP.0 to 2
    //     decode if subinstructions of group, from above and IR.OP.3 to 8
    //     drive further states from these decoders and IR.OP.3 to 8
    //     at end an state subpart of insget to get control to come back

    // #control IR-MuxselIdx = insindex
    //   only 1 input, but LUT to buffer insindex to reduce fan-out
    //     gives faster instruction decode, slower mux switching irrelevant
    Pin IrmaSelIndex_Insidx = pin_l(PinIn1);
    route(IrmaInsindex, IrmaSelIndex_Insidx);
    lut(I1, "IrmaSelIndex");
    Pin IrmaSelIndex = pin_l(PinOut);
    routem(IrmaSelIndex, IrmaIns_SelIndex);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control IR-MuxselInd = (~insindex)&IR.I
    Pin IrmaSelIndir_Insidx = pin_l(PinIn1);
    route(IrmaInsindex, IrmaSelIndir_Insidx);
    Pin IrmaSelIndir_Irind13 = pin_l(PinIn2);
    IrmaInstrBus[13][IrmaInstrPos[13]++] = IrmaSelIndir_Irind13;
    lut((~I1)&I2, "IrmaSelIndir");
    Pin IrmaSelIndir = pin_l(PinOut);
    routem(IrmaSelIndir, IrmaIns_SelIndir);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control ClkE-IR+MA = insget|insexec&(insindex|IR.I)
    Pin IrmaClkE_Insget = pin_l(PinIn1);
    route(IrmaInsget, IrmaClkE_Insget);
    Pin IrmaClkE_Insexec = pin_l(PinIn2);
    route(IrmaInsexec, IrmaClkE_Insexec);
    Pin IrmaClkE_Insidx = pin_l(PinIn3);
    route(IrmaInsindex, IrmaClkE_Insidx);
    Pin IrmaClkE_Irind13 = pin_l(PinIn4);
    IrmaInstrBus[13][IrmaInstrPos[13]++] = IrmaClkE_Irind13;
    lut(I1|I2&(I3|I4), "IrmaClkE");
    Pin IrmaClkE = pin_l(PinOut);
    routem(IrmaClkE, IrmaReg_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control subpart insMAM-Mux=PC = insget
    //   only 1 input, no LUT, route 1 input direct to target
    route(IrmaInsget, MamEnProg_Irma);
    // commented out nextlut(), here despite no effect, for visual consistency
    //nextlut();
    // #control subpart insMAM-Mux=IR.X = insexec&insindex
    Pin IrmaMamEnIridx_Insexec = pin_l(PinIn1);
    route(IrmaInsexec, IrmaMamEnIridx_Insexec);
    Pin IrmaMamEnIridx_Insidx = pin_l(PinIn2);
    route(IrmaInsindex, IrmaMamEnIridx_Insidx);
    lut(I1&I2, "IrmaMamEnIdx");
    Pin IrmaMamEnIridx = pin_l(PinOut);
    route(IrmaMamEnIridx, MamEnIridx_Irma);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart insMAM-Mux=MA = insexec&(~insindex)&IR.I
    Pin IrmaMamEnMaddr_Insexec = pin_l(PinIn1);
    route(IrmaInsexec, IrmaMamEnMaddr_Insexec);
    Pin IrmaMamEnMaddr_Insidx = pin_l(PinIn2);
    route(IrmaInsindex, IrmaMamEnMaddr_Insidx);
    Pin IrmaMamEnMaddr_Irind13 = pin_l(PinIn3);
    IrmaInstrBus[13][IrmaInstrPos[13]++] = IrmaMamEnMaddr_Irind13;
    lut(I1&(~I2)&I3, "IrmaMamEnMaddr");
    Pin IrmaMamEnMaddr = pin_l(PinOut);
    route(IrmaMamEnMaddr, MamEnMaddr_Irma);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart insPC-Mux=PC+1 = insget
    //   only 1 input, no LUT, route 1 input direct to target
    route(IrmaInsget, ProgSelIncr_Irma);
    // commented out nextlut(), here despite no effect, for visual consistency
    //nextlut();
    // #control subpart insClkE-PC = insget
    route(IrmaInsget, ProgClkE_Irma);
    //nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('I', "tag Instruction Register");

    nextsli();


// ------ arithmetic register section

    // hold the current data being operated on (at lest for one step)
    // in the real PDP-10s the AR is used for the entire calculation
    //   here it is only used for temporary holding of C(E) or C(AC) inputs
    // this is because all instruction units have their own work/result "AR"s
    // that is done so to avoid an large (ca 16:1) Mux, expensive in FPGAs
    //   also gets rid of lots of wiring back and forward, less delay, faster

    System.out.print("Arithmetic Register: data " + pos());

    // data path AR register in FFs, no LUT logic as input direct from memory

    Pin ArithReg_Mem[] = new Pin[DataBits],
        ArithReg_ClkE[] = new Pin[DataBits/2],
        ArithReg[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      ArithReg_Mem[Bit] = pin_l(PinIn1);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = ArithReg_Mem[Bit];
      lut(I1, "ArithReg"+Bit);
      slice(ClockFrom, SysClock);
      if (Bit%2 == 1) {
        ArithReg_ClkE[Bit/2] = pin_s(PinCe); }
      ArithReg[Bit] = pin_l(PinOutQ);
      nextlut(); }

    System.out.print(", control " + pos());

    // #control ClkE-AR =
    // #  ...|atestClkE-AR|logicClkE-AR|hwordClkE-AR|btestClkE-AR|...
    // # this will grow with other instruction groups adding subparts 
    Pin ArithClkE_Atest = pin_l(PinIn1);
    Pin ArithClkE_Logic = pin_l(PinIn2);
    Pin ArithClkE_Hword = pin_l(PinIn3);
    Pin ArithClkE_Btest = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "ArithClkE");
    Pin ArithClkE = pin_l(PinOut);
    routem(ArithClkE, ArithReg_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('A', "tag Arithmetic Register");

    nextsli();


// ------ immediate mode mux section

    // switch an input between, C(E|AC)/0,,E = AR/MA contents

    System.out.print("Immediate Mux: data " + pos());

    // data path 2:1-Mux with common select line, in from MA or AR rgisters
    // LUT and no FFs, Arith uses FFs and no LUT, they could be merged

    Pin ImmedData_Arith[] = new Pin[DataBits],
        ImmedData_Maddr[] = new Pin[DataBits],
        ImmedData_SelMaddr[] = new Pin[DataBits],
        ImmedData[] = new Pin[DataBits];

    // same get around JBits routing "feature" as with MdrmDataBus
    Pin ImmedDataBus[][] = new Pin[DataBits][100];
    int ImmedDataPos[] = new int[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      ImmedData_Arith[Bit] = pin_l(PinIn1);
      route(ArithReg[Bit], ImmedData_Arith[Bit]);
      if (Bit >= 18) {
        // width of MA: select between MA and AR
        ImmedData_Maddr[Bit] = pin_l(PinIn2);
        IrmaInstrBus[Bit][IrmaInstrPos[Bit]++] = ImmedData_Maddr[Bit]; }
      ImmedData_SelMaddr[Bit] = pin_l(PinIn3);
      if (Bit >= 18) {
        // width of MA: classic 2:1-Mux only for E width of bits
        lut(I1&(~I3)|I2&I3, "ImmedDataLow"+Bit); }
      else {
        // rest of bits: just gate AR, zero (not MA) when 0,,E selected
        lut(I1&(~I3), "ImmedDataHigh"+Bit); }
      ImmedData[Bit] = pin_l(PinOut);
      if (DebugDisp == 1) {
        // show what data Immed selected for last operation
        slice(ClockFrom, SysClock); }
      nextlut(); }

    System.out.print(", control " + pos());

    // #control Imm-Mux=E =
    // #  ...|atestImm-Mux=E|logicImm-Mux=E|hwordImm-Mux=E|btestImm-Mux=E|...
    // # this will grow with other instruction groups adding subparts
    Pin ImmedSelMaddr_Atest = pin_l(PinIn1);
    Pin ImmedSelMaddr_Logic = pin_l(PinIn2);
    Pin ImmedSelMaddr_Hword = pin_l(PinIn3);
    Pin ImmedSelMaddr_Btest = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "ImmedSelMaddr");
    Pin ImmedSelMaddr = pin_l(PinOut);
    routem(ImmedSelMaddr, ImmedData_SelMaddr);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('I', "tag Immediate Mux");

    nextsli();


// ------ memory data write mux section

    // select which instruction group unit can write back to memory
    //   allows one "AR" per instr group, if more "AR"s then use an local Mux
    // select when writeback to memory happens, one OR input per instr group

    // uses F6-Muxes, needs fitting 1/0 slice pairs, so align to 1 slice
    //   must do this before printing position, as it changes the position
    alignsli1();

    System.out.print("Memory Data Write Mux: data " + pos());

    // data path 8:1-Mux with 3 common select lines in from 8 instr group units
    //   done as 2:1-Mux (F6-Mux) of 2 2:1-Mux (F5-Muxes) of 4 2:1-Muxes (LUTs)
    // directly drive from IR.0-2, so no decoders in instruction units needed
    //   alternative would be 8:1-Mux w 8 separ enables, from 8 units decoders

    // rename MdwmData_0/1/2/7 when their instruction units are named
    Pin MdwmData_0[] = new Pin[DataBits],
        MdwmData_1[] = new Pin[DataBits],
        MdwmData_2[] = new Pin[DataBits],
        MdwmData_Atest[] = new Pin[DataBits],
        MdwmData_Logic[] = new Pin[DataBits],
        MdwmData_Hword[] = new Pin[DataBits],
        MdwmData_Btest[] = new Pin[DataBits],
        MdwmData_7[] = new Pin[DataBits],
        MdwmData_Sel1Sl0F[] = new Pin[DataBits],
        MdwmData_Sel1Sl0G[] = new Pin[DataBits],
        MdwmData_Sel1Sl1F[] = new Pin[DataBits],
        MdwmData_Sel1Sl1G[] = new Pin[DataBits],
        MdwmData_Sel2Sl0[] = new Pin[DataBits],
        MdwmData_Sel2Sl1[] = new Pin[DataBits],
        MdwmData_Sel4[] = new Pin[DataBits],
        MdwmData[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // use F5-Mux, needs fitting F/G LUT pair, so align to F LUT
      alignlutf();
      MdwmData_0[Bit] = pin_l(PinIn1);
      MdwmData_1[Bit] = pin_l(PinIn2);
      MdwmData_Sel1Sl0F[Bit] = pin_l(PinIn3);
      lut(I1&(~I3)|I2&I3, "MdwmData"+Bit+".LF");
      MdwmData_Sel2Sl0[Bit] = pin_l(PinInB);
      // invert BX, because F5-Mux is 0/1:G/F (!)
      lcell(InBInvert, InBInvert_On);
      secondlut();
      MdwmData_2[Bit] = pin_l(PinIn1);
      MdwmData_Atest[Bit] = pin_l(PinIn2);
      MdwmData_Sel1Sl0G[Bit] = pin_l(PinIn3);
      lut(I1&(~I3)|I2&I3, "MdwmData"+Bit+".LG");
      secondsli();
      MdwmData_Logic[Bit] = pin_l(PinIn1);
      MdwmData_Hword[Bit] = pin_l(PinIn2);
      MdwmData_Sel1Sl1F[Bit] = pin_l(PinIn3);
      lut(I1&(~I3)|I2&I3, "MdwmData"+Bit+".RF");
      MdwmData_Sel2Sl1[Bit] = pin_l(PinInB);
      // invert BX, because F5-Mux is 0/1:G/F (!)
      lcell(InBInvert, InBInvert_On);
      secondlut();
      MdwmData_Btest[Bit] = pin_l(PinIn1);
      MdwmData_7[Bit] = pin_l(PinIn2);
      MdwmData_Sel1Sl1G[Bit] = pin_l(PinIn3);
      lut(I1&(~I3)|I2&I3, "MdwmData"+Bit+".RG");
      // output from an Y, so take it from last LUT, not out of 2nd
      MdwmData_Sel4[Bit] = pin_l(PinInB);
      // invert BY, because F6-Mux is 0/1:Sl1/Sl0 (!)
      lcell(InBInvert, InBInvert_On);
      // set Y output to use F5 and G6 Muxes for full 8:1 Mux
      lcell(OutFrom, OutFrom_F56Mux);
      MdwmData[Bit] = pin_l(PinOut);
      route(MdwmData[Bit], MemData_Mdwm[Bit]);
      route(MdwmData[Bit], FmemData_Mdwm[Bit]);
      if (DebugDisp == 1) {
        // show what data Mdwm sent to Mem for last write
        // only Y relevant, X has 2:1-Mux of groups 0 and 1
        slice(ClockFrom, SysClock); }
      // 4 LUTs per bit, requires 2 CLBs, zigzag placing of bits like 2 slice
      //   bit 35/33/../1 in first CLB, bit 34/32/../0 in second CLB
      // zigzag placing of bit pairs 35/34, 33/32, .., 1/0
      nextzigzagclb(Bit%2 == 1); }

    System.out.print(", control " + pos());

    // #buffer mdwm-Sel1 = IR.OP.2
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower mux switching irrelevant
    Pin MdwmSel1_Irop2 = pin_l(PinIn1);
    IrmaInstrBus[2][IrmaInstrPos[2]++] = MdwmSel1_Irop2;
    lut(I1, "MdwmSel1");
    Pin MdwmSel1 = pin_l(PinOut);
    routem(MdwmSel1, MdwmData_Sel1Sl0F);
    routem(MdwmSel1, MdwmData_Sel1Sl1F);
    routem(MdwmSel1, MdwmData_Sel1Sl0G);
    routem(MdwmSel1, MdwmData_Sel1Sl1G);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer mdwm-Sel2 = IR.OP.1
    Pin MdwmSel2_Irop1 = pin_l(PinIn1);
    IrmaInstrBus[1][IrmaInstrPos[1]++] = MdwmSel2_Irop1;
    lut(I1, "MdwmSel2");
    Pin MdwmSel2 = pin_l(PinOut);
    routem(MdwmSel2, MdwmData_Sel2Sl0);
    routem(MdwmSel2, MdwmData_Sel2Sl1);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer mdwm-Sel4 = IR.OP.0
    Pin MdwmSel4_Irop0 = pin_l(PinIn1);
    IrmaInstrBus[0][IrmaInstrPos[0]++] = MdwmSel4_Irop0;
    lut(I1, "MdwmSel4");
    Pin MdwmSel4 = pin_l(PinOut);
    routem(MdwmSel4, MdwmData_Sel4);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control mdwmClkE-Mem =
    // #  ...|atestClkE-Mem|logicClkE-Mem|hwordClkE-Mem|btestClkE-Mem|...
    // # this will grow with other instruction groups adding subparts
    Pin MdwmMemClkE_Atest = pin_l(PinIn1);
    Pin MdwmMemClkE_Logic = pin_l(PinIn2);
    Pin MdwmMemClkE_Hword = pin_l(PinIn3);
    Pin MdwmMemClkE_Btest = pin_l(PinIn4);
    lut(I1|I2|I3|I4, "MdwmMemClkE");
    Pin MdwmMemClkE = pin_l(PinOut);
    // wire to both normal and fast memory, they select which is written to
    route(MdwmMemClkE, MemClkE_MdwmMemClkE);
    route(MdwmMemClkE, FmemClkE_MdwmMemClkE);
    if (DebugDisp == 1) {
      // show whether this control is triggering what it controls
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('W', "tag Memory Data Write Mux");

    // we used 2 CLBs because of zigzagging, this and next, so step 4 slices
    nextsli(); nextsli(); nextsli(); nextsli();


// ------ 000mooooo unimplemented user operations instruction unit section

    // implement the 000mooooo unimplemented user operations
    //   2 types of UUOs with each 32 unused subtypes

    // m UUO mode, from PDP-10 Reference Manual, page 2-123 to 2-127
    // Mnemonic Instruction       IR m 3
    // LUUOn    Local Unimp User Op    0
    // MUUOn    Monitor Unimp User Op  1

    // LUUOs in user mode do IR->C(40), C(41)->PC
    //   wait until memory is large enough for 040 to exist
    // MUUOs in user require mode switching to monitor
    //   and all UUOs in monitor mode require memory managment
    //   wait until memory management and monitor mode is implemented
    //   this is different on each processor model

    // --- nothing implemented yet


// ------ 001xxxxxx double, byte, floating point instruction unit section

    // implement the 001xxxxxx double, byte, floating point instructions
    //               ...000x0x (unimp before KL-10 UJEN/101/JSYS/ADJSP)
    //               ...000x1x (unimp before KL-10 GFAD/GFSB/GFMP/GFDV)
    //               ...0010xx (unimp before KI-10 DFAD/DFSB/DFMP/DFDV)
    //               ...0011xx (unimp before KL-10 DADD/DSUB/DMUL/DDIV)
    //               ...010x0x (unimp before KI-10 DMOVE/DMOVN/DMOVM/DMOVNM)
    //               ...010x1x (un b KI-10 FIX/FIXR/FLTR) (un b KL-10 EXTEND)
    //               ...011xxx (KA F UFA/DFN/FSC) (KA B IBP/ILDB/LDB/IDPB/DPB)
    //               ...1ff0mm (unimp KA-10 w/o FPU FAD/FSB/FMP/FDV)
    //               ...1ff001 (unimp KA-10 w/o FPU FADL/FSBL/FMPL/FDVL)
    //               ...1ff1mm (unimp KA-10 w/o FPU FADR/FSBR/FMPR/FDVR)
    // multiple instruction formats will require multiple subsections

    // --- nothing implemented yet
    // are the most complicated instructions, and least often used
    //   will be implemented last, possibly even after monitor mode and IO


// ------ 010xxxxxx move, fixed point, subroutine instruction unit section

    // implement the 010xxxxxx move, fixed point, subroutine instructions
    //               ...00ccmm MOVE/MOVS/MOVN/MOVM
    //               ...01oimm IMUL/MUL/IDIV/DIV
    //               ...100xxx ASH(C)/ROT(C)/LSH(C) and JFFO and 247
    //               ...101xxx EXCH/BLT/AOBJP/AOBJN/JRST/JFCL/XCT/MAP
    //               ...1100xx PUSHJ/PUSH/POP/POJ
    //               ...1101xx JSR/JSP/JSA/JRA
    //               ...111omm ADD/SUB
    // multiple instruction formats will require multiple subsections

    // --- nothing implemented yet
    // mainly simple instructions, but subsectioning will make it large
    //   will be implemented before 000/111/001, but after 110/101


// ------ 011tttmmm arithmetic testing instruction unit section

    // implement the 011tttmmm arithmetic testing instructions
    //   8 operand/test combinations with each 8 skip/jump condition modes

    // ttt test type, from PDP-10 Reference Manual, page 2-43 to 2-46
    // Mnemonic  Instruction  IR ttt 3 4 5  Arithmetic    Action
    // CAI[m]    Comp AC Immed Skip  0 0 0  C(AC) - 0,,E  if doit PC=PC+1
    // CAM[m]    Comp AC Mem Skip    0 0 1  C(AC) - C(E)  if doit PC=PC+1
    // JUMP[m]   Comp AC Zero Jump   0 1 0  C(AC) - 0     if doit PC=E
    // SKIP[m]   Comp Mem Zero Skip  0 1 1  C(E) - 0      if doit PC=PC+1
    //                                                    if AC#0 C(AC)=C(E)
    // AOJ[m]    Add One AC Jump     1 0 0  C(AC)=C(AC)+1 if doit PC=E
    // AOS[m]    Add One Mem Skip    1 0 1  C(E) =C(E)+1  if doit PC=PC+1
    //                                                    if AC#0 C(AC)=C(E)
    // SOJ[m]    Sub One AC Jump     1 1 0  C(AC)=C(AC)-1 if doit PC=E
    // SOS[m]    Sub One Mem Skip    1 1 1  C(E) =C(E)-1  if doit PC=PC+1
    //                                                    if AC#0 C(AC)=C(E)

    // mmm condition mode, from PDP-10 Reference Manual, page 2-42
    // Suffix  Condition  IR mmm 6 7 8  doit on
    // -       Never             0 0 0  nothing
    // L       Lower             0 0 1  negative (bit 0 = 1)
    // E       Equal             0 1 0  zero (OR of data bits 0..35 = 0)
    // LE      Lower or Equal    0 1 1  negative OR zero
    // A       Always            1 0 0  inverse of -
    // GE      Greater or Equal  1 0 1  inverse of L
    // N       Not Equal         1 1 0  inverse of E
    // G       Greater           1 1 1  inverse of LE
    // IR.6 is invert test, IR.7 is test zero, IR.8 is test negative

    // AO*/SO* set flags, 0 <-> -1 Carry0 and Carry1
    //                            max <-> min Trap1 Overflow and Carry0 or C1
    // see flags discussion from PDP-10 Reference Manual, page 2-11 and 2-65
    // to do: no flags implemented yet, as only visible in PC store and JFCL
    //   wait until subroutine and JFCL stuff implemented


    // first step, subtract/pass/incr/decr computation unit

    System.out.print("Arithmetic Test, Compute: data " + pos());

    // data path subtract/pass/incr/decr comp unit with 2 function select lines
    //   wide-NOR to detect zero/non-zero, positive/negative is bit0
    //   AR register (for SKIP/AO*/SO* writeback) in computation units FFs

    Pin AtestComp_Mem[] = new Pin[DataBits],
        AtestComp_Imm[] = new Pin[DataBits],
        AtestComp_SelFunc1[] = new Pin[DataBits],
        AtestComp_SelFunc2[] = new Pin[DataBits],
        AtestComp_Cin[] = new Pin[DataBits],
        AtestComp[] = new Pin[DataBits],
        AtestComp_Neg[] = new Pin[DataBits];

    Pin AtestReg_ClkE[] = new Pin[DataBits/2],
        AtestReg[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // C(AC) or C(E) direct as comparison subtrahend
      AtestComp_Mem[Bit] = pin_l(PinIn1);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = AtestComp_Mem[Bit];
      // C(E) or 0,,E from Immed as comparison subtractor
      AtestComp_Imm[Bit] = pin_l(PinIn2);
      ImmedDataBus[Bit][ImmedDataPos[Bit]++] = AtestComp_Imm[Bit];
      // select compare function: CA* I1-I2, JUMP/SKIP I1-0, AO* I1+1, SO* I1-1
      AtestComp_SelFunc1[Bit] = pin_l(PinIn3);
      AtestComp_SelFunc2[Bit] = pin_l(PinIn4);
      // compute: CA*:       I1-I2 = I1+(-I2)
      //          JUMP/SKIP: I1-0 = I1
      //          AO*:       I1+1, use carry = 1 for +1, so I1+0 = I1
      //          SO*:       I1-1 = I1+(-1), -1 is all ones, XOR 1 = not
      lut((I1^(~I2)) &(~I3)&(~I4)|
           I1        &  I3 &(~I4)|
           I1        &(~I3)&  I4 |
          (~I1)      &  I3 &  I4 , "AtestComp"+Bit);
      if (Bit == DataLSB) {
        // increment by carry, carry/borrow-in via BX from carry generator
        slice(CarryBegin, CarryBegin_FromInBx);
        // AtestComp_Cin as array because of Java compiler bug
        //   aborts with "may not be initialised" error
        //     instead of just warning, and no way to suppress it
        AtestComp_Cin[Bit] = pin_l(PinInB); }
      lcell(CarryEnable, CarryEnable_Modify);
      slice(CarryValue, CarryValue_In1);
      // and use carry adder result
      lcell(OutFrom, OutFrom_LutXorCarry);
      AtestComp[Bit] = pin_l(PinOut);
      if (Bit == DataMSB) {
        // and directly use non-registered MSB for negative test result
        // AtestComp_Neg as array because of Java compiler bug
        //   aborts with "may not be initialised" error
        //     instead of just warning, and no way to suppress it
        AtestComp_Neg[Bit] = AtestComp[Bit]; }
      // atest units arithmetic register for AO* and SO*, to then write back
      slice(ClockFrom, SysClock);
      if (Bit%2 == 1) {
        AtestReg_ClkE[Bit/2] = pin_s(PinCe); }
      AtestReg[Bit] = pin_l(PinOutQ);
      route(AtestReg[Bit], MdwmData_Atest[Bit]);
      nextlut(); }

    System.out.print(", control " + pos());

    // arithmethic testing unit extension of processor state logic diagram
    // @in state insexec (Instruction Execute), continued from Irma insexec
    // @  elseif IR.OP=011000mmm atestcai (Atest Compare Immediate)
    // @    MAM-Mux=IR.AC               # 1st operand C(AC)
    // @    Imm-Mux=E                   # 2nd operand 0,,E
    // @    PC-Mux=PC+1                 # skip
    // @    if doit                     # apply "tt:sub" and "mmm" test
    // @      ClkE-PC=1
    // @    state=insget                # we are done
    // @  elseif IR.OP=011001mmm atestcam (Atest Compare Memory)
    // @    MAM-Mux=MA                  # 2nd operand C(MA)
    // @    ClkE-AR=1                   # store to AR
    // @    state=atestseccam           # get second operand and test
    // @  elseif IR.OP=011tt0mmm atestjump (Atest Zero/AddOne/SubOne and Jump)
    // @    MAM-Mux=IR.AC               # 1st operand C(AC)
    // @    PC-Mux=MA                   # jump to 0,,E
    // @    if doit                     # apply "tt" and "mmm" test
    // @      ClkE-PC=1
    // @    if IR.OP=0111t0mmm          # AOJ/SOJ
    // @      ClkE-atestAR=1            # store to atestAR
    // @      state=atestwracc          # and write back to C(AC)
    // @    else
    // @      state=insget              # we are done
    // @  elseif IR.OP=011tt1mmm atestskip (Atest Zero/AddOne/SubOne and Skip)
    // @    MAM-Mux=MA                  # 1st operand C(MA)
    // @    PC-Mux=PC+1                 # skip
    // @    if doit                     # apply "tt" and "mmm" test
    // @      ClkE-PC=1
    // @    if IR.OP=0111t1mmm          # AOS/SOS
    // @      ClkE-atestAR=1            # store to atestAR
    // @      state=atestwrmem          # write back to C(MA)
    // @    if IR.AC                    # with write to C(AC)
    // @      ClkE-atestAR=1            # store "second" copy to atestAR
    // @      state=atestwracc          # write also to C(AC)
    // @    else
    // @      state=insget              # we are done
    // @in state atestseccam (Atest Compare Acc Mem second fetch)
    // @  MAM-Mux=IR.AC                 # 1st operand C(AC)
    // @  Imm-Mux=AR                    # 2nd operand from AR
    // @  PC-Mux=PC+1                   # skip
    // @  if doit                       # apply "tt:sub" and "mmm" test
    // @    ClkE-PC=1
    // @  state=insget                  # we are done
    // @in state atestwracc (Atest Write Result to Accumulator)
    // @  MAM-Mux=IR.AC
    // @  MD-Mux=atest
    // @  ClkE-Mem=1
    // @  state=insget                  # we are done
    // @in state atestwrmem (Atest Write Result to Memory)
    // @  MAM-Mux=IR.MA
    // @  MD-Mux=atest
    // @  ClkE-Mem=1
    // @  if IR.AC                      # with write to C(AC)
    // @    state=atestwracc            # write back to C(MA)
    // @  else                          # was AOJ/SOJ or SKIP with AC, done
    // @    state=insget                # we are done

    // arithmetic testing unit extension of instruction execution FSM
    // #decode atestinstr = insdecode&(~IR.OP.0)&IR.OP.1&IR.OP.2
    //   decode if start of an 011tttmmm aritmetic testing instruction
    Pin AtestInstr_Insdecode = pin_l(PinIn1);
    IrmaInsdecodeBus[IrmaInsdecodePos++] = AtestInstr_Insdecode;
    Pin AtestInstr_Irop0 = pin_l(PinIn2);
    IrmaInstrBus[0][IrmaInstrPos[0]++] = AtestInstr_Irop0;
    Pin AtestInstr_Irop1 = pin_l(PinIn3);
    IrmaInstrBus[1][IrmaInstrPos[1]++] = AtestInstr_Irop1;
    Pin AtestInstr_Irop2 = pin_l(PinIn4);
    IrmaInstrBus[2][IrmaInstrPos[2]++] = AtestInstr_Irop2;
    lut(I1&(~I2)&I3&I4, "AtestInstr");
    Pin AtestInstr = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #decode atestcai = atestinstr&(~IR.OP.3)&(~IR.OP.4)&(~IR.OP.5)
    //   decode if start of an compare immediate arith testing instruction
    Pin AtestCai_Instr = pin_l(PinIn1);
    route(AtestInstr, AtestCai_Instr);
    Pin AtestCai_Irop3 = pin_l(PinIn2);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestCai_Irop3;
    Pin AtestCai_Irop4 = pin_l(PinIn3);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = AtestCai_Irop4;
    Pin AtestCai_Irop5 = pin_l(PinIn4);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = AtestCai_Irop5;
    lut(I1&(~I2)&(~I3)&(~I4), "AtestCai");
    Pin AtestCai = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #decode atestcam = atestinstr&(~IR.OP.3)&(~IR.OP.4)&IR.OP.5
    //   decode if start of an compare memory arith testing instruction
    // #state atestseccam = atestcam
    //   if come from atestcam, always fetch second operand
    //     same logic function, no own LUT, just add FF and OutQ Pin
    Pin AtestCam_Instr = pin_l(PinIn1);
    route(AtestInstr, AtestCam_Instr);
    Pin AtestCam_Irop3 = pin_l(PinIn2);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestCam_Irop3;
    Pin AtestCam_Irop4 = pin_l(PinIn3);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = AtestCam_Irop4;
    Pin AtestCam_Irop5 = pin_l(PinIn4);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = AtestCam_Irop5;
    lut(I1&(~I2)&(~I3)&I4, "AtestCam and AtestSeccam");
    Pin AtestCam = pin_l(PinOut);
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin AtestSeccam = pin_l(PinOutQ);
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #decode atestjump = atestinstr&(IR.OP.3|IR.OP.4)&(~IR.OP.5)
    //   decode if start of an jump on zero/add/sub arith testing instruction
    Pin AtestJump_Instr = pin_l(PinIn1);
    route(AtestInstr, AtestJump_Instr);
    Pin AtestJump_Irop3 = pin_l(PinIn2);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestJump_Irop3;
    Pin AtestJump_Irop4 = pin_l(PinIn3);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = AtestJump_Irop4;
    Pin AtestJump_Irop5 = pin_l(PinIn4);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = AtestJump_Irop5;
    lut(I1&(I2|I3)&(~I4), "AtestJump");
    Pin AtestJump = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #decode atestskip = atestinstr&(IR.OP.3|IR.OP.4)&IR.OP.5
    //   decode if start of an skip on zero/add/sub arith testing instruction
    Pin AtestSkip_Instr = pin_l(PinIn1);
    route(AtestInstr, AtestSkip_Instr);
    Pin AtestSkip_Irop3 = pin_l(PinIn2);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestSkip_Irop3;
    Pin AtestSkip_Irop4 = pin_l(PinIn3);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = AtestSkip_Irop4;
    Pin AtestSkip_Irop5 = pin_l(PinIn4);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = AtestSkip_Irop5;
    lut(I1&(I2|I3)&I4, "AtestSkip");
    Pin AtestSkip = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #state atestwrmem = atestskip&IR.OP.3
    //   if AOS/SOS write to memory
    Pin AtestWrmem_Skip = pin_l(PinIn1);
    route(AtestSkip, AtestWrmem_Skip);
    Pin AtestWrmem_Irop3 = pin_l(PinIn2);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestWrmem_Irop3;
    lut(I1&I2, "AtestWrmem");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin AtestWrmem = pin_l(PinOutQ);
    nextlut();
    // #state atestwracc = atestjump&IR.OP.3|atestskip&(~IR.OP.3)&insacc|
    // #  atestwrmem&insacc
    //   if AOJ/SOJ or SKIP with AC!=0 or AOS/SOS with AC!=0 after write memory
    Pin AtestWracc1_Jump = pin_l(PinIn1);
    route(AtestJump, AtestWracc1_Jump);
    Pin AtestWracc1_Irop3 = pin_l(PinIn2);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestWracc1_Irop3;
    Pin AtestWracc1_Skip = pin_l(PinIn3);
    route(AtestSkip, AtestWracc1_Skip);
    Pin AtestWracc1_Insacc = pin_l(PinIn4);
    IrmaInsaccBus[IrmaInsaccPos++] = AtestWracc1_Insacc;
    lut(I1&I2|I3&(~I2)&I4, "AtestWracc1");
    Pin AtestWracc1 = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    Pin AtestWracc_Wracc1 = pin_l(PinIn1);
    route(AtestWracc1, AtestWracc_Wracc1);
    Pin AtestWracc_Wrmem = pin_l(PinIn2);
    route(AtestWrmem, AtestWracc_Wrmem);
    Pin AtestWracc_Insacc = pin_l(PinIn3);
    IrmaInsaccBus[IrmaInsaccPos++] = AtestWracc_Insacc;
    lut(I1|I2&I3, "AtestWracc");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin AtestWracc = pin_l(PinOutQ);
    nextlut();
    // #state subpart atestinsget = atestcai|atestcam2|atestjump&(~IR.OP.3)|
    // #  atestskip&(~IR.OP.3)&(~insacc)|atestwracc|
    // #  atestwrmem&(~IR.OP.3)
    //   trigger next instr if come done CAI or CAM second or JUMP or
    //     SKIP and AC=0 or written acc, or written mem and not AOS/SOS
    Pin AtestInsget1_Cai = pin_l(PinIn1);
    route(AtestCai, AtestInsget1_Cai);
    Pin AtestInsget1_Seccam = pin_l(PinIn2);
    route(AtestSeccam, AtestInsget1_Seccam);
    Pin AtestInsget1_Jump = pin_l(PinIn3);
    route(AtestJump, AtestInsget1_Jump);
    Pin AtestInsget1_Irop3 = pin_l(PinIn4);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestInsget1_Irop3;
    lut(I1|I2|I3&(~I4), "AtestInsget1");
    Pin AtestInsget1 = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    Pin AtestInsget2_Skip = pin_l(PinIn1);
    route(AtestSkip, AtestInsget2_Skip);
    Pin AtestInsget2_Irop3 = pin_l(PinIn2);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestInsget2_Irop3;
    Pin AtestInsget2_Insacc = pin_l(PinIn3);
    IrmaInsaccBus[IrmaInsaccPos++] = AtestInsget2_Insacc;
    Pin AtestInsget2_Wracc = pin_l(PinIn4);
    route(AtestWracc, AtestInsget2_Wracc);
    lut(I1&(~I2)&(~I3)|I4, "AtestInsget2");
    Pin AtestInsget2 = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    Pin AtestInsget_Insget1 = pin_l(PinIn1);
    route(AtestInsget1, AtestInsget_Insget1);
    Pin AtestInsget_Insget2 = pin_l(PinIn2);
    route(AtestInsget2, AtestInsget_Insget2);
    Pin AtestInsget_Wrmem = pin_l(PinIn3);
    route(AtestWrmem, AtestInsget_Wrmem);
    Pin AtestInsget_Irop3 = pin_l(PinIn4);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestInsget_Irop3;
    lut(I1|I2|I3&(~I4), "AtestInsget");
    Pin AtestInsget = pin_l(PinOut);
    route(AtestInsget, IrmaInsget_Atest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #buffer atest-SelFunc1 = IR.OP.4
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower function select irrelevant
    Pin AtestSelFunc1_Irop4 = pin_l(PinIn1);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = AtestSelFunc1_Irop4;
    lut(I1, "AtestSelFunc1");
    Pin AtestSelFunc1 = pin_l(PinOut);
    routem(AtestSelFunc1, AtestComp_SelFunc1);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer atest-SelFunc2 = IR.OP.3
    Pin AtestSelFunc2_Irop3 = pin_l(PinIn1);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = AtestSelFunc2_Irop3;
    lut(I1, "AtestSelFunc2");
    Pin AtestSelFunc2 = pin_l(PinOut);
    routem(AtestSelFunc2, AtestComp_SelFunc2);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #carrygenerator atest-carry = ~IR.OP.4
    //   CA* subtract: I1-I2, no borrow,               carry-in 1
    //   JUMP/SKIP:    I1+0, no carry,                 carry-in 0
    //   AO*:          I1+1, add 1 from carry=1        carry-in 1
    //   SO*:          I1+(-1), no carry,              carry-in 0
    //   so carry-in is NOT(JUMP OR SKIP OR SO*) is NOT(IR.OP.4)
    Pin AtestCin_Irop4 = pin_l(PinIn1);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = AtestCin_Irop4;
    lut(~I1, "AtestCin");
    Pin AtestCin = pin_l(PinOut);
    route(AtestCin, AtestComp_Cin[DataLSB]);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control ClkE-atestAR = atestjump|atestskip
    //   hold test value, if we will be later storing, to Acc or to Mem
    Pin AtestClkE_Jump = pin_l(PinIn1);
    route(AtestJump, AtestClkE_Jump);
    Pin AtestClkE_Skip = pin_l(PinIn2);
    route(AtestSkip, AtestClkE_Skip);
    if (DebugDisp == 1) {
      // always set AR, so that the test value is visible
      lut(0xFFFF, "AtestClkE, forced to allways enabled by debug"); }
    else {
      lut(I1|I2, "AtestClkE"); }
    Pin AtestClkE = pin_l(PinOut);
    routem(AtestClkE, AtestReg_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control subpart atestMAM-Mux=MA = atestcam|atestskip|atestwrmem
    Pin AtestMamEnMaddr_Cam = pin_l(PinIn1);
    route(AtestCam, AtestMamEnMaddr_Cam);
    Pin AtestMamEnMaddr_Skip = pin_l(PinIn2);
    route(AtestSkip, AtestMamEnMaddr_Skip);
    Pin AtestMamEnMaddr_Wrmem = pin_l(PinIn3);
    route(AtestWrmem, AtestMamEnMaddr_Wrmem);
    lut(I1|I2|I3, "AtestMamEnMaddr");
    Pin AtestMamEnMaddr = pin_l(PinOut);
    route(AtestMamEnMaddr, MamEnMaddr_Atest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart atestMAM-Mux=IR.AC =
    // #  atestcai|atestjump|atestcam2|atestwracc
    Pin AtestMamEnIrac_Cai = pin_l(PinIn1);
    route(AtestCai, AtestMamEnIrac_Cai);
    Pin AtestMamEnIrac_Jump = pin_l(PinIn2);
    route(AtestJump, AtestMamEnIrac_Jump);
    Pin AtestMamEnIrac_Seccam = pin_l(PinIn3);
    route(AtestSeccam, AtestMamEnIrac_Seccam);
    Pin AtestMamEnIrac_Wracc = pin_l(PinIn4);
    route(AtestWracc, AtestMamEnIrac_Wracc);
    lut(I1|I2|I3|I4, "AtestMamEnIrac");
    Pin AtestMamEnIrac = pin_l(PinOut);
    route(AtestMamEnIrac, MamEnIrac_Atest);
    nextlut();
    // #control subpart atestClkE-AR = atestcam
    //   only 1 input, no LUT, route 1 input direct to target
    route(AtestCam, ArithClkE_Atest);
    // commented out nextlut(), here despite no effect, for visual consistency
    //nextlut();
    // #control subpart atestImm-Mux=E = atestcai
    route(AtestCai, ImmedSelMaddr_Atest);
    //nextlut();
    // #control subpart atestClkE-Mem = atestwracc|atestwrmem
    Pin AtestMdwmMemClkE_Wracc = pin_l(PinIn1);
    route(AtestWracc, AtestMdwmMemClkE_Wracc);
    Pin AtestMdwmMemClkE_Wrmem = pin_l(PinIn2);
    route(AtestWrmem, AtestMdwmMemClkE_Wrmem);
    lut(I1|I2, "AtestMdwmMemClkE");
    Pin AtestMdwmMemClkE = pin_l(PinOut);
    route(AtestMdwmMemClkE, MdwmMemClkE_Atest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('A', "tag Arithmetic Test Instruction, Compute");

    nextsli();


    // second step, wide-NOR to detect zero/non-zero, positive/negative is bit0

    System.out.print("Arithmetic Test 2, Zero: data " + pos());

    Pin AtestZero_Comp[] = new Pin[DataBits],
        AtestZero[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // OR of data bits 0..35 = 0 -> wide-NOR of data bits 0..35
      //   = not wide-OR = wide-AND of not OR = wide-AND of NOR
      //   ripple time of 36-LUT wide-NOR is hidden behind arithmetic above
      AtestZero_Comp[Bit] = pin_l(PinIn1);
      route(AtestComp[Bit], AtestZero_Comp[Bit]);
      // NOR of only 1 bit per LUT, gives an NOT
      lut(~I1, "AtestZero"+Bit);
      if (Bit == DataLSB) {
        // wide-AND of NORs/NOTs, BX = 1, LUTs force 0
        slice(CarryBegin, CarryBegin_FromInBx); }
      lcell(CarryEnable, CarryEnable_Modify);
      slice(CarryValue, CarryValue_Zero);
      if (Bit == DataMSB) {
        // grab wide-NOR carry output for zero test result
        slice(OutBFrom, OutBFrom_Carry);
        // AtestZero as array because of Java compiler bug
        //   aborts with "may not be initialised" error
        //     instead of just warning, and no way to suppress it
        AtestZero[Bit] = pin_l(PinOutB); }
      if (DebugDisp == 1) {
        // show what data was fed into the wide-AND
        slice(ClockFrom, SysClock); }
      nextlut(); }

    System.out.print(", control " + pos());

    // #control subpart atestPC-Mux=PC+1 = atestcai|atestskip|atestcam2
    Pin AtestProgSelIncr_Cai = pin_l(PinIn1);
    route(AtestCai, AtestProgSelIncr_Cai);
    Pin AtestProgSelIncr_Skip = pin_l(PinIn2);
    route(AtestSkip, AtestProgSelIncr_Skip);
    Pin AtestProgSelIncr_Seccam = pin_l(PinIn3);
    route(AtestSeccam, AtestProgSelIncr_Seccam);
    lut(I1|I2|I3, "AtestProgSelIncr");
    Pin AtestProgSelIncr = pin_l(PinOut);
    route(AtestProgSelIncr, ProgSelIncr_Atest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #test atestcond = atestneg&IR.OP.8|atestzero&IR.OP.7
    //   select which conditions are allowed to trigger jump/skip
    Pin AtestCond_Neg = pin_l(PinIn1);
    route(AtestComp_Neg[DataMSB], AtestCond_Neg);
    Pin AtestCond_Irop8 = pin_l(PinIn2);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = AtestCond_Irop8;
    Pin AtestCond_Zero = pin_l(PinIn3);
    route(AtestZero[DataMSB], AtestCond_Zero);
    Pin AtestCond_Irop7 = pin_l(PinIn4);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = AtestCond_Irop7;
    lut(I1&I2|I3&I4, "AtestCond");
    Pin AtestCond = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart atestClkE-PC =
    // #  (atestPC-Mux=PC+1|atestjump)&(atestcond^IR.OP.6)
    Pin AtestProgClkE_Incr = pin_l(PinIn1);
    route(AtestProgSelIncr, AtestProgClkE_Incr);
    Pin AtestProgClkE_Jump = pin_l(PinIn2);
    route(AtestJump, AtestProgClkE_Jump);
    Pin AtestProgClkE_Cond = pin_l(PinIn3);
    route(AtestCond, AtestProgClkE_Cond);
    Pin AtestProgClkE_Irop6 = pin_l(PinIn4);
    IrmaInstrBus[6][IrmaInstrPos[6]++] = AtestProgClkE_Irop6;
    lut((I1|I2)&(I3^I4), "AtestProgClkE");
    Pin AtestProgClkE = pin_l(PinOut);
    route(AtestProgClkE, ProgClkE_Atest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('2', "tag Arithmetic Test Instruction 2, Zero");

    nextsli();


// ------ 100ffffmm bitwise boolean logic instruction unit section

    // implement the 100ffffmm bitwise boolean logic instructions
    //   16 boolean logic functions with 4 operand selection modes

    // ffff function table, from PDP-10 Reference Manual, page 2-38
    // C(AC)     = MdrmData           0 1 0 1
    // C(E)/0,,E = ImmedData          0 0 1 1
    // --------------------------------------
    // Mnemonic  Instruction  IR ffff 3 4 5 6
    //                 function value 8 4 2 1
    // SETZ[m]   Set Zero             0 0 0 0
    // AND[m]    And                  0 0 0 1
    // ANDCA[m]  And Compl Acc        0 0 1 0
    // SETM[m]   Set Memory           0 0 1 1 (m=I on KL+ in non-0 sect XMOVEI)
    // ANDCM[m]  And Compl Mem        0 1 0 0
    // SETA[m]   Set Accu             0 1 0 1
    // XOR[m]    Exclusive Or         0 1 1 0
    // IOR[m]    Inclusive Or         0 1 1 1
    // ANDCB[m]  And Compl Both       1 0 0 0
    // EQV[m]    Equivalent           1 0 0 1
    // SETCA[m]  Set Compl Acc        1 0 1 0
    // ORCA[m]   Or Compl Acc         1 0 1 1
    // SETCM[m]  Set Compl Mem        1 1 0 0
    // ORCM[m]   Or Compl Mem         1 1 0 1
    // ORCB[m]   Or Compl Both        1 1 1 0
    // SETO[m]   Set One              1 1 1 1

    // mm operand mode, from PDP-10 Reference Manual, page 2-32
    // Suffix  Mode  IR mm 7 8  Source1  Source2  Destination
    // -       Basic       0 0  C(E)     C(AC)    AC
    // I       Immediate   0 1  0,,E     C(AC)    AC
    // M       Memory      1 0  C(E)     C(AC)    E
    // B       Both        1 1  C(E)     C(AC)    AC and E

    System.out.print("Boolean Logic: data " + pos());

    // data path 4:1-Mux of IR.3..6 selected by high C(E)/0,,E  and low C(AC)
    //   uses 2:1-Mux (F5-Mux) of 2 2:1-Muxes (LUTs)
    //   AR register for writeback in function generator units FFs

    Pin LogicFunc_Func8[] = new Pin[DataBits],
        LogicFunc_Func4[] = new Pin[DataBits],
        LogicFunc_Func2[] = new Pin[DataBits],
        LogicFunc_Func1[] = new Pin[DataBits],
        LogicFunc_Sel2Imm[] = new Pin[DataBits],
        LogicFunc_Sel1MemF[] = new Pin[DataBits],
        LogicFunc_Sel1MemG[] = new Pin[DataBits];

    Pin LogicReg_ClkE[] = new Pin[DataBits], LogicReg[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // use F5-Mux, needs fitting F/G LUT pair, so align to F LUT
      alignlutf();
      // IR.OP bits 3..4 to be muxed
      LogicFunc_Func8[Bit] = pin_l(PinIn1);
      LogicFunc_Func4[Bit] = pin_l(PinIn2);
      // F3 (and G3) lower selection bit, from C(E) = AR
      LogicFunc_Sel1MemF[Bit] = pin_l(PinIn3);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = LogicFunc_Sel1MemF[Bit];
      lut(I1&(~I3)|I2&I3, "LogicFunc"+Bit+".F");
      // BX upper selection bit, from C(AC) = MD
      LogicFunc_Sel2Imm[Bit] = pin_l(PinInB);
      ImmedDataBus[Bit][ImmedDataPos[Bit]++] = LogicFunc_Sel2Imm[Bit];
      // invert BX, because F5-Mux is 0/1:G/F
      lcell(InBInvert, InBInvert_On);
      lcell(OutFrom, OutFrom_F56Mux);
      // logic units arithmetic register, to then write back
      slice(ClockFrom, SysClock);
      LogicReg_ClkE[Bit] = pin_s(PinCe);
      LogicReg[Bit] = pin_l(PinOutQ);
      route(LogicReg[Bit], MdwmData_Logic[Bit]);
      secondlut();
      // IR.OP bits 5..6 to be muxed
      LogicFunc_Func2[Bit] = pin_l(PinIn1);
      LogicFunc_Func1[Bit] = pin_l(PinIn2);
      // G3 (and F3) lower selection bit, from C(E) = AR
      LogicFunc_Sel1MemG[Bit] = pin_l(PinIn3);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = LogicFunc_Sel1MemG[Bit];
      lut(I1&(~I3)|I2&I3, "LogicFunc"+Bit+".G");
      if (DebugDisp == 1) {
        // make YQ show blank to not distract, take it from !BY = !1 = 0
        lcell(InBInvert, InBInvert_On);
        lcell(OutFrom, OutFrom_F56Mux); }
      nextzigzagsli(Bit%2 == 1); }

    System.out.print(", control " + pos());

    // boolean logic unit extension of processor state logic diagram
    // @in state insexec (Instruction Execute), continued from Irma insexec
    // @  elseif IR.OP=100ffff01 logicimmed (Logic from Immediate)
    // @    MAM-Mux=IR.AC               # 1st operand C(AC)
    // @    Imm-Mux=E                   # 2nd operand 0,,E
    // @    ClkE-logicAR=1              # apply "ffff", store to logicAR
    // @    state=logicstoac            # store logicAR to C(AC)
    // @  elseif IR.OP=100ffffmm logicmem (Logic from Memory)
    // @    MAM-Mux=MA                  # 2nd operand C(MA)
    // @    ClkE-AR=1                   # store to AR
    // @    state=logicsecmem           # get second operand and operate
    // @in state logicsecmem (Logic Second from Memory)
    // @  MAM-Mux=IR.AC                 # 1st operand C(AC)
    // @  Imm-Mux=AR                    # 2nd operand from AR
    // @  ClkE-logicAR=1                # apply "ffff", store to logicAR
    // @  if IR.OP=100ffff10            # memory mode
    // @    state=logicstomem           # store logicAR to C(MA)
    // @  else
    // @    state=logicstoac            # store logicAR to C(AC)
    // @in state logicstoac (Logic Store to Accumulator)
    // @  MAM-Mux=IR.AC
    // @  MD-Mux=logic
    // @  ClkE-Mem=1
    // @  if IR.OP=100ffff11            # both mode
    // @    state=logicstomem           # store logicAR also to C(MA)
    // @  else
    // @    state=insget                # we are done
    // @in state logicstomem (Logic Store to Memory)
    // @  MAM-Mux=MA
    // @  MD-Mux=logic
    // @  ClkE-Mem=1
    // @  state=insget                  # we are done

    // boolean logic unit extension of instruction execution FSM
    // #decode logicinstr = insdecode&IR.OP.0&(~IR.OP.1)&(~IR.OP.2)
    //   decode if start of an 100ffffmm bitwise boolean logic instruction
    Pin LogicInstr_Insdecode = pin_l(PinIn1);
    IrmaInsdecodeBus[IrmaInsdecodePos++] = LogicInstr_Insdecode;
    Pin LogicInstr_Irop0 = pin_l(PinIn2);
    IrmaInstrBus[0][IrmaInstrPos[0]++] = LogicInstr_Irop0;
    Pin LogicInstr_Irop1 = pin_l(PinIn3);
    IrmaInstrBus[1][IrmaInstrPos[1]++] = LogicInstr_Irop1;
    Pin LogicInstr_Irop2 = pin_l(PinIn4);
    IrmaInstrBus[2][IrmaInstrPos[2]++] = LogicInstr_Irop2;
    lut(I1&I2&(~I3)&(~I4), "LogicInstr");
    Pin LogicInstr = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #decode logicimmed = logicinstr&(~IR.OP.7)&IR.OP.8
    //   decode if start of an from immediate mode logic instruction
    Pin LogicImmed_Instr = pin_l(PinIn1);
    route(LogicInstr, LogicImmed_Instr);
    Pin LogicImmed_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = LogicImmed_Irop7;
    Pin LogicImmed_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = LogicImmed_Irop8;
    lut(I1&(~I2)&I3, "LogicImmed");
    Pin LogicImmed = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #decode logicmem = logicinstr&(~((~IR.OP.7)&IR.OP.8))
    //   decode if start of an from memory mode logic instruction
    // #state logicsecmem = logicmem
    //   if come from logicmem, always fetch second operand from Accumulator
    //     same logic function, no own LUT, just add FF and OutQ Pin
    Pin LogicMem_Instr = pin_l(PinIn1);
    route(LogicInstr, LogicMem_Instr);
    Pin LogicMem_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = LogicMem_Irop7;
    Pin LogicMem_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = LogicMem_Irop8;
    lut(I1&(~((~I2)&I3)), "LogicMem and LogicSecmem");
    Pin LogicMem = pin_l(PinOut);
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin LogicSecmem = pin_l(PinOutQ);
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #state logicstoac = logicsecmem&(~(IR.OP.7&(~IR.OP.8)))|logicimmed
    //   if come from logicsecmem and is both mode or come from logicimmed
    Pin LogicStoac_Secmem = pin_l(PinIn1);
    route(LogicSecmem, LogicStoac_Secmem);
    Pin LogicStoac_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = LogicStoac_Irop7;
    Pin LogicStoac_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = LogicStoac_Irop8;
    Pin LogicStoac_Immed = pin_l(PinIn4);
    route(LogicImmed, LogicStoac_Immed);
    lut(I1&(~(I2&(~I3)))|I4, "LogicStoac");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin LogicStoac = pin_l(PinOutQ);
    nextlut();
    // #state logicstomem =
    // #  logicsecmem&IR.OP.7&(~IR.OP.8)|logicstoac&IR.OP.7&IR.OP.8
    //   if come from logicsecmem and is memory mode
    //   or come from logicstoac and both mode
    Pin LogicStomem_Secmem = pin_l(PinIn1);
    route(LogicSecmem, LogicStomem_Secmem);
    Pin LogicStomem_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = LogicStomem_Irop7;
    Pin LogicStomem_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = LogicStomem_Irop8;
    Pin LogicStomem_Stoac = pin_l(PinIn4);
    route(LogicStoac, LogicStomem_Stoac);
    lut(I1&I2&(~I3)|I4&I2&I3, "LogicStomem");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin LogicStomem = pin_l(PinOutQ);
    nextlut();
    // #state subpart logicinsget = logicstoac&(~(IR.OP.7&IR.OP.8))|logicstomem
    //   trigger next instr if come from logicstoac and memory mode
    //     or come from logicstomem
    Pin LogicInsget_Stoac = pin_l(PinIn1);
    route(LogicStoac, LogicInsget_Stoac);
    Pin LogicInsget_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = LogicInsget_Irop7;
    Pin LogicInsget_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = LogicInsget_Irop8;
    Pin LogicInsget_Stomem = pin_l(PinIn4);
    route(LogicStomem, LogicInsget_Stomem);
    lut(I1&(~(I2&I3))|I4, "LogicInsget");
    Pin LogicInsget = pin_l(PinOut);
    route(LogicInsget, IrmaInsget_Logic);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #buffer logic-Func8 = IR.OP.3
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower function select irrelevant
    Pin LogicFunc8_Irop3 = pin_l(PinIn1);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = LogicFunc8_Irop3;
    lut(I1, "LogicFunc8");
    Pin LogicFunc8 = pin_l(PinOut);
    routem(LogicFunc8, LogicFunc_Func8);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer logic-Func4 = IR.OP.4
    Pin LogicFunc4_Irop4 = pin_l(PinIn1);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = LogicFunc4_Irop4;
    lut(I1, "LogicFunc4");
    Pin LogicFunc4 = pin_l(PinOut);
    routem(LogicFunc4, LogicFunc_Func4);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer logic-Func2 = IR.OP.5
    Pin LogicFunc2_Irop5 = pin_l(PinIn1);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = LogicFunc2_Irop5;
    lut(I1, "LogicFunc2");
    Pin LogicFunc2 = pin_l(PinOut);
    routem(LogicFunc2, LogicFunc_Func2);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer logic-Func1 = IR.OP.6
    Pin LogicFunc1_Irop6 = pin_l(PinIn1);
    IrmaInstrBus[6][IrmaInstrPos[6]++] = LogicFunc1_Irop6;
    lut(I1, "LogicFunc1");
    Pin LogicFunc1 = pin_l(PinOut);
    routem(LogicFunc1, LogicFunc_Func1);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control ClkE-logicAR = logicsecmem|logicimmed
    Pin LogicClkE_Secmem = pin_l(PinIn1);
    route(LogicSecmem, LogicClkE_Secmem);
    Pin LogicClkE_Immed = pin_l(PinIn2);
    route(LogicImmed, LogicClkE_Immed);
    if (DebugDisp == 1) {
      // always set AR, so that the result is visible
      lut(0xFFFF, "LogicClkE, forced to allways enabled by debug"); }
    else {
      lut(I1|I2, "LogicClkE"); }
    Pin LogicClkE = pin_l(PinOut);
    routem(LogicClkE, LogicReg_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control subpart logicMAM-Mux=MA = logicmem|logicstomem
    Pin LogicMamEnMaddr_Mem = pin_l(PinIn1);
    route(LogicMem, LogicMamEnMaddr_Mem);
    Pin LogicMamEnMaddr_Stomem = pin_l(PinIn2);
    route(LogicStomem, LogicMamEnMaddr_Stomem);
    lut(I1|I2, "LogicMamEnMaddr");
    Pin LogicMamEnMaddr = pin_l(PinOut);
    route(LogicMamEnMaddr, MamEnMaddr_Logic);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart logicMAM-Mux=IR.AC = logicsecmem|logicimmed|logicstoac
    Pin LogicMamEnIrac_Secmem = pin_l(PinIn1);
    route(LogicSecmem, LogicMamEnIrac_Secmem);
    Pin LogicMamEnIrac_Immed = pin_l(PinIn2);
    route(LogicImmed, LogicMamEnIrac_Immed);
    Pin LogicMamEnIrac_Stoac = pin_l(PinIn3);
    route(LogicStoac, LogicMamEnIrac_Stoac);
    lut(I1|I2|I3, "LogicMamEnIrac");
    Pin LogicMamEnIrac = pin_l(PinOut);
    route(LogicMamEnIrac, MamEnIrac_Logic);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart logicClkE-AR = logicmem
    //   only 1 input, no LUT, route 1 input direct to target
    route(LogicMem, ArithClkE_Logic);
    // commented out nextlut(), here despite no effect, for visual consistency
    //nextlut();
    // #control subpart logicImm-Mux=E = logicimmed
    route(LogicImmed, ImmedSelMaddr_Logic);
    //nextlut();
    // #control subpart logicClkE-Mem = logicstoac|logicstomem
    Pin LogicMdwmMemClkE_Stoac = pin_l(PinIn1);
    route(LogicStoac, LogicMdwmMemClkE_Stoac);
    Pin LogicMdwmMemClkE_Stomem = pin_l(PinIn2);
    route(LogicStomem, LogicMdwmMemClkE_Stomem);
    lut(I1|I2, "LogicMdwmMemClkE");
    Pin LogicMdwmMemClkE = pin_l(PinOut);
    route(LogicMdwmMemClkE, MdwmMemClkE_Logic);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('L', "tag Boolean Logic Instruction");

    nextsli(); nextsli();


// ------ 101tootmm half word instruction unit section

    // implement the 101tootmm half word instructions
    //   4 halfword transfers with 4 modifications and 4 operand select modes

    // tt transfer type, from PDP-10 Reference Manual, page 2-55
    // Mnemonic   Instruction      IR tt 3 6
    // HLL[o][m]  Half Word Left Left    0 0 (m=I on KL+ in non-0 sect XHLLI)
    // HRL[o][m]  Half Word Right Left   0 1
    // HRR[o][m]  Half Word Right Right  1 0
    // HLR[o][m]  Half Word Left Right   1 1
    // IR.3 is select main destination half, IR.6 is swap L and R of input

    // oo modification, from PDP-10 Reference Manual, page 2-55
    // Suffix  Modification  IR oo 4 5  Other (not main) Destination Half
    // -       Do Nothing          0 0  leave unchanged
    // Z       Zeros               0 1  all zeros
    // O       Ones                1 0  all ones
    // E       Extend              1 1  top bit of main half

    // mm operand mode, from PDP-10 Reference Manual, page 2-55
    // Suffix  Mode  IR mm 7 8  Source  Source2+Destination
    // -       Basic       0 0  C(E)    C(AC)
    // I       Immediate   0 1  0,,E    C(AC)
    // M       Memory      1 0  C(AC)   C(E)
    // S       Self        1 1  C(E)    C(E), if AC#0 C(AC)=C(E)


    // first step, source half word swapper

    System.out.print("Half Word, Swapper: data " + pos());

    // data path 2:1-Mux by IR.OP.6 source half word swapper from Immed
    //   4:1-Mux by IR.OP.4+5 dest/zero/one/swap-top modifier for non-main half
    //   2:1-Mux by IR.OP.3 select destination halves to swapped or modified
    //   AR register for writeback in destination selector units FFs

    Pin HwordSwap_Imm[] = new Pin[DataBits],
        HwordSwap_ImmSwapped[] = new Pin[DataBits],
        HwordSwap_SelSwapped[] = new Pin[DataBits],
        HwordSwap[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // C(E) or C(AC) or 0,,E from Immed as source1 for HLL or HRR
      HwordSwap_Imm[Bit] = pin_l(PinIn1);
      ImmedDataBus[Bit][ImmedDataPos[Bit]++] = HwordSwap_Imm[Bit];
      // swapped C(E) or C(AC) or 0,,E from Immed as source1 for HLR or HRL
      HwordSwap_ImmSwapped[Bit] = pin_l(PinIn2);
      if (Bit >= 18) {
        // right: left half for HLR swap
        ImmedDataBus[Bit-18][ImmedDataPos[Bit-18]++] =
          HwordSwap_ImmSwapped[Bit]; }
      else {
        // left: right half for HRL swap
        ImmedDataBus[Bit+18][ImmedDataPos[Bit+18]++] =
          HwordSwap_ImmSwapped[Bit]; }
      // mux sel to swap the half words
      HwordSwap_SelSwapped[Bit] = pin_l(PinIn3);
      lut(I1&(~I3)|I2&I3, "HwordSwap"+Bit);
      HwordSwap[Bit] = pin_l(PinOut);
      if (DebugDisp == 1) {
        // show what data the swapper selected
        slice(ClockFrom, SysClock); }
      nextlut(); }

    System.out.print(", control " + pos());

    // half word unit extension of processor state logic diagram
    // @in state insexec (Instruction Execute), continued from Irma insexec
    // @  elseif IR.OP=101toot01 hwordimmed (Hword from Immediate)
    // @    Imm-Mux=E                   # 1st operand 0,,E
    // @    MAM-Mux=IR.AC               # 2nd operand C(AC)
    // @    ClkE-hwordAR=1              # apply "toot", store to hwordAR
    // @    state=hwordstoac            # store hwordAR to C(AC)
    // @  elseif IR.OP=101toot10 hwordac (Hword from Accumulator)
    // @    MAM-Mux=IR.AC               # 1st operand C(AC)
    // @    ClkE-AR=1                   # store to AR
    // @    state=hwordsecmem           # get second operand and operate
    // @  elseif IR.OP=101tootmm hwordmem (Hword from Memory)
    // @    MAM-Mux=MA                  # 1st operand C(MA)
    // @    ClkE-AR=1                   # store to AR
    // @    if IR.OP=101toot11          # self mode, second also memory
    // @      state=hwordsecmem         # get second operand and operate
    // @    else
    // @      state=hwordsecac          # get second operand and operate
    // @in state hwordsecmem (Hword Second from Memory)
    // @  Imm-Mux=AR                    # 1st operand from AR
    // @  MAM-Mux=MA                    # 2nd operand C(MA)
    // @  ClkE-hwordAR=1                # apply "toot", store to hwordAR
    // @  state=hwordstomem             # store hwordAR to C(MA)
    // @in state hwordsecac (Hword Second from Accumulator)
    // @  Imm-Mux=AR                    # 1st operand from AR
    // @  MAM-Mux=IR.AC                 # 2nd operand C(AC)
    // @  ClkE-hwordAR=1                # apply "toot", store to hwordAR
    // @  state=hwordstoac              # store hwordAR only to C(AC)
    // @in state hwordstomem (Hword Store to Memory)
    // @  MAM-Mux=MA
    // @  MD-Mux=hword
    // @  ClkE-Mem=1
    // @  if IR.OP=101toot11 and IR.AC  # self mode, second also C(AC)
    // @    state=hwordstoac            # store hwordAR also to C(MA)
    // @  else
    // @    state=insget                # we are done
    // @in state hwordstoac (Hword Store to Accumulator)
    // @  MAM-Mux=IR.AC
    // @  MD-Mux=hword
    // @  ClkE-Mem=1
    // @  state=insget                  # we are done

    // half word unit extension of instruction execution FSM
    // #decode hwordinstr = insdecode&IR.OP.0&(~IR.OP.1)&IR.OP.2
    //   decode if start of an 1 101tootmm half word instruction
    Pin HwordInstr_Insdecode = pin_l(PinIn1);
    IrmaInsdecodeBus[IrmaInsdecodePos++] = HwordInstr_Insdecode;
    Pin HwordInstr_Irop0 = pin_l(PinIn2);
    IrmaInstrBus[0][IrmaInstrPos[0]++] = HwordInstr_Irop0;
    Pin HwordInstr_Irop1 = pin_l(PinIn3);
    IrmaInstrBus[1][IrmaInstrPos[1]++] = HwordInstr_Irop1;
    Pin HwordInstr_Irop2 = pin_l(PinIn4);
    IrmaInstrBus[2][IrmaInstrPos[2]++] = HwordInstr_Irop2;
    lut(I1&I2&(~I3)&I4, "HwordInstr");
    Pin HwordInstr = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #decode hwordimmed = logicinstr&(~IR.OP.7)&IR.OP.8
    //   decode if start of an "from immediate" halfword instruction
    Pin HwordImmed_Instr = pin_l(PinIn1);
    route(HwordInstr, HwordImmed_Instr);
    Pin HwordImmed_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = HwordImmed_Irop7;
    Pin HwordImmed_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = HwordImmed_Irop8;
    lut(I1&(~I2)&I3, "HwordImmed");
    Pin HwordImmed = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #decode hwordac = howrdinstr&IR.OP.7&(~IR.OP.8)
    //   decode if start of an "from accumulator " halfword instruction
    Pin HwordAc_Instr = pin_l(PinIn1);
    route(HwordInstr, HwordAc_Instr);
    Pin HwordAc_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = HwordAc_Irop7;
    Pin HwordAc_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = HwordAc_Irop8;
    lut(I1&I2&(~I3), "HwordAc");
    Pin HwordAc = pin_l(PinOut);
    nextlut();
    // #decode hwordmem = hwordinstr&((~IR.OP.7)&(~IR.OP.8)|IR.OP.7&IR.OP.8)
    //   decode if start of an "from memory" halfword instruction
    Pin HwordMem_Instr = pin_l(PinIn1);
    route(HwordInstr, HwordMem_Instr);
    Pin HwordMem_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = HwordMem_Irop7;
    Pin HwordMem_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = HwordMem_Irop8;
    lut(I1&((~I2)&(~I3)|I2&I3), "HwordMem");
    Pin HwordMem = pin_l(PinOut);
    nextlut();
    // #state hwordsecmem = hwordinstr&IR.OP.7
    //   if second from and to memory
    Pin HwordSecmem_Instr = pin_l(PinIn1);
    route(HwordInstr, HwordSecmem_Instr);
    Pin HwordSecmem_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = HwordSecmem_Irop7;
    lut(I1&I2, "HwordSecmem");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin HwordSecmem = pin_l(PinOutQ);
    nextlut();
    // #state hwordsecac = hwordinstr&(~IR.OP.7)&(~IR.OP.8)
    //   if second from and to accumulator
    Pin HwordSecac_Instr = pin_l(PinIn1);
    route(HwordInstr, HwordSecac_Instr);
    Pin HwordSecac_Irop7 = pin_l(PinIn2);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = HwordSecac_Irop7;
    Pin HwordSecac_Irop8 = pin_l(PinIn3);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = HwordSecac_Irop8;
    lut(I1&(~I2)&(~I3), "HwordSecac");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin HwordSecac = pin_l(PinOutQ);
    nextlut();
    // #state hwordstomem = hwordsecmem
    //   if to memory
    Pin HwordStomem_Secmem = pin_l(PinIn1);
    route(HwordSecmem, HwordStomem_Secmem);
    lut(I1, "HwordStomem");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin HwordStomem = pin_l(PinOutQ);
    nextlut();
    // #decode hworddoself = IR.OP.7&IR.OP.8&insacc
    Pin HwordDoself_Irop7 = pin_l(PinIn1);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = HwordDoself_Irop7;
    Pin HwordDoself_Irop8 = pin_l(PinIn2);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = HwordDoself_Irop8;
    Pin HwordDoself_Insacc = pin_l(PinIn3);
    IrmaInsaccBus[IrmaInsaccPos++] = HwordDoself_Insacc;
    lut(I1&I2&I3, "HwordDoself");
    Pin HwordDoself = pin_l(PinOut);
    nextlut();
    // #state hwordstoac = hwordimmed|hwordsecac|hwordstomem&hworddoself
    //   if to accumulator
    Pin HwordStoac_Immed = pin_l(PinIn1);
    route(HwordImmed, HwordStoac_Immed);
    Pin HwordStoac_Secac = pin_l(PinIn2);
    route(HwordSecac, HwordStoac_Secac);
    Pin HwordStoac_Stomem = pin_l(PinIn3);
    route(HwordStomem, HwordStoac_Stomem);
    Pin HwordStoac_Doself = pin_l(PinIn4);
    route(HwordDoself, HwordStoac_Doself);
    lut(I1|I2|I3&I4, "HwordStoac");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin HwordStoac = pin_l(PinOutQ);
    nextlut();
    // #state subpart hwordinsget = hwordstoac|hwordstomem&(~hworddoself)
    //   trigger next instr if come from hwordstoac
    //     or come from hwordstoac and non-self mode
    Pin HwordInsget_Stoac = pin_l(PinIn1);
    route(HwordStoac, HwordInsget_Stoac);
    Pin HwordInsget_Stomem = pin_l(PinIn2);
    route(HwordStomem, HwordInsget_Stomem);
    Pin HwordInsget_Doself = pin_l(PinIn3);
    route(HwordDoself, HwordInsget_Doself);
    lut(I1|I2&(~I3), "HwordInsget");
    Pin HwordInsget = pin_l(PinOut);
    route(HwordInsget, IrmaInsget_Hword);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #buffer hword-SelSwapped = IR.OP.6
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower function select irrelevant
    Pin HwordSelSwapped_Irop6 = pin_l(PinIn1);
    IrmaInstrBus[6][IrmaInstrPos[6]++] = HwordSelSwapped_Irop6;
    lut(I1, "HwordSelSwapped");
    Pin HwordSelSwapped = pin_l(PinOut);
    routem(HwordSelSwapped, HwordSwap_SelSwapped);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control subpart hwordMAM-Mux=MA = hwordmem|hwordsecmem|hwordstomem
    Pin HwordMamEnMaddr_Mem = pin_l(PinIn1);
    route(HwordMem, HwordMamEnMaddr_Mem);
    Pin HwordMamEnMaddr_Secmem = pin_l(PinIn2);
    route(HwordSecmem, HwordMamEnMaddr_Secmem);
    Pin HwordMamEnMaddr_Stomem = pin_l(PinIn3);
    route(HwordStomem, HwordMamEnMaddr_Stomem);
    lut(I1|I2|I3, "HwordMamEnMaddr");
    Pin HwordMamEnMaddr = pin_l(PinOut);
    route(HwordMamEnMaddr, MamEnMaddr_Hword);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart hwordMAM-Mux=IR.AC =
    // #  hwordimmed|hwordac|hwordsecac|hwordstoac
    Pin HwordMamEnIrac_Immed = pin_l(PinIn1);
    route(HwordImmed, HwordMamEnIrac_Immed);
    Pin HwordMamEnIrac_Ac = pin_l(PinIn2);
    route(HwordAc, HwordMamEnIrac_Ac);
    Pin HwordMamEnIrac_Secac = pin_l(PinIn3);
    route(HwordSecac, HwordMamEnIrac_Secac);
    Pin HwordMamEnIrac_Stoac = pin_l(PinIn4);
    route(HwordStoac, HwordMamEnIrac_Stoac);
    lut(I1|I2|I3|I4, "HwordMamEnIrac");
    Pin HwordMamEnIrac = pin_l(PinOut);
    route(HwordMamEnIrac, MamEnIrac_Hword);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart hwordClkE-AR = hwordac|hwordmem
    Pin HwordArithAlkE_Ac = pin_l(PinIn1);
    route(HwordAc, HwordArithAlkE_Ac);
    Pin HwordArithAlkE_Mem = pin_l(PinIn2);
    route(HwordMem, HwordArithAlkE_Mem);
    lut(I1|I2, "HwordArithAlkE");
    Pin HwordArithAlkE = pin_l(PinOut);
    route(HwordArithAlkE, ArithClkE_Hword);
    nextlut();
    // #control subpart hwordImm-Mux=E = hwordimmed
    //   only 1 input, no LUT, pass final control as direct input to route()
    route(HwordImmed, ImmedSelMaddr_Hword);
    // commented out nextlut(), here despite no effect, for visual consistency
    //nextlut();
    // #control subpart hwordMdwmClkE-Mem = hwordstoac|howrdstomem
    Pin HwordMdwmMemClkE_Stoac = pin_l(PinIn1);
    route(HwordStoac, HwordMdwmMemClkE_Stoac);
    Pin HwordMdwmMemClkE_Stomem = pin_l(PinIn2);
    route(HwordStomem, HwordMdwmMemClkE_Stomem);
    lut(I1|I2, "HwordMdwmMemClkE");
    Pin HwordMdwmMemClkE = pin_l(PinOut);
    route(HwordMdwmMemClkE, MdwmMemClkE_Hword);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('H', "tag Half Word Instruction, Swapper");

    nextsli();


    // second step, dest/zero/one/swap-top modifier for non-main half

    System.out.print("Half Word 2, Modifier: data " + pos());

    Pin HwordMod_Mem[] = new Pin[DataBits],
        HwordMod_SwapTopR[] = new Pin[DataBits/2],
        HwordMod_SwapTopL[] = new Pin[DataBits/2],
        HwordMod_SelMod1[] = new Pin[DataBits],
        HwordMod_SelMod2[] = new Pin[DataBits],
        HwordMod[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // C(AC) or C(E) direct for set/stay destination
      HwordMod_Mem[Bit] = pin_l(PinIn1);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = HwordMod_Mem[Bit];
      // top bit from swapper for extend, 2 arrays to routem() bit 18 and bit 0
      if (Bit >= 18) {
        // for right shift Bit to fill short array
        HwordMod_SwapTopR[Bit-18] = pin_l(PinIn2);
        if (Bit == 18) {
          // top bit of left half to all of right half
          routem(HwordSwap[DataMSB], HwordMod_SwapTopR); } }
      else {
        HwordMod_SwapTopL[Bit] = pin_l(PinIn2);
        if (Bit == DataMSB) {
          // top bit of right half to all of left half (the actual sign extend)
          routem(HwordSwap[18], HwordMod_SwapTopL); } }
      HwordMod_SelMod1[Bit] = pin_l(PinIn3);
      HwordMod_SelMod2[Bit] = pin_l(PinIn4);
      // modification: Nothing I1, Zeroes 0, Ones 1, Extend I2
      lut(I1     &(~I3)&(~I4)|
          0x0000 &  I3 &(~I4)|
          0xFFFF &(~I3)&  I4 |
          I2     &  I3 &  I4 , "HwordMod"+Bit);
      HwordMod[Bit] = pin_l(PinOut);
      if (DebugDisp == 1) {
        // show what data the modifier selected
        slice(ClockFrom, SysClock); }
      nextlut(); }

    System.out.print(", control " + pos());

    // #buffer hword-SelMod1 = IR.OP.5
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower function select irrelevant
    Pin HwordSelMod1_Irop5 = pin_l(PinIn1);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = HwordSelMod1_Irop5;
    lut(I1, "HwordSelMod1");
    Pin HwordSelMod1 = pin_l(PinOut);
    routem(HwordSelMod1, HwordMod_SelMod1);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer hword-SelMod2 = IR.OP.4
    Pin HwordSelMod2_Irop4 = pin_l(PinIn1);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = HwordSelMod2_Irop4;
    lut(I1, "HwordSelMod2");
    Pin HwordSelMod2 = pin_l(PinOut);
    routem(HwordSelMod2, HwordMod_SelMod2);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('2', "tag Half Word Instruction 2, Modifier");

    nextsli();


    // third step, select destination halves to swaped or modified

    System.out.print("Half Word 3, Destination: data " + pos());

    Pin HwordDest_Swap[] = new Pin[DataBits],
        HwordDest_Mod[] = new Pin[DataBits],
        HwordDest_SelDest[] = new Pin[DataBits];

    Pin HwordReg_ClkE[] = new Pin[DataBits/2],
        HwordReg[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      HwordDest_Swap[Bit] = pin_l(PinIn1);
      route(HwordSwap[Bit], HwordDest_Swap[Bit]);
      HwordDest_Mod[Bit] = pin_l(PinIn2);
      route(HwordMod[Bit], HwordDest_Mod[Bit]);
      // select mux left swap and right mod or left mod and right swap
      HwordDest_SelDest[Bit] = pin_l(PinIn3);
      if (Bit >= 18) {
        // right: on HRR and HLR from swapper, else from modifier
        lut(I1&I3|I2&(~I3), "HwordDestRight"+Bit); }
      else {
        // left: on HLL and HRL from swapper, else from modifier
        lut(I1&(~I3)|I2&I3, "HwordDestLeft"+Bit); }
      // hword units arithmetic register, to then write back
      slice(ClockFrom, SysClock);
      if (Bit%2 == 1) {
        HwordReg_ClkE[Bit/2] = pin_s(PinCe); }
      HwordReg[Bit] = pin_l(PinOutQ);
      route(HwordReg[Bit], MdwmData_Hword[Bit]);
      nextlut(); }

    System.out.print(", control " + pos());

    // #buffer hword-SelDest = IR.OP.3
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower function select irrelevant
    Pin HwordSelDest_Irop3 = pin_l(PinIn1);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = HwordSelDest_Irop3;
    lut(I1, "HwordSelDest");
    Pin HwordSelDest = pin_l(PinOut);
    routem(HwordSelDest, HwordDest_SelDest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control ClkE-hwordAR = hwordimmed|hwordsecmem|hwordsecac
    Pin HwordClkE_Immed = pin_l(PinIn1);
    route(HwordImmed, HwordClkE_Immed);
    Pin HwordClkE_Secmem = pin_l(PinIn2);
    route(HwordSecmem, HwordClkE_Secmem);
    Pin HwordClkE_Secac = pin_l(PinIn3);
    route(HwordSecac, HwordClkE_Secac);
    if (DebugDisp == 1) {
      // always set AR, so that the test value is visible
      lut(0xFFFF, "HwordClkE, forced to allways enabled by debug"); }
    else {
      lut(I1|I2|I3, "HwordClkE"); }
    Pin HwordClkE = pin_l(PinOut);
    routem(HwordClkE, HwordReg_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('3', "tag Half Word Instruction 3, Destination");

    nextsli();


// ------ 110ooamma bit testing instruction unit section

    // implement the 110ooamma bit testing instructions
    //   4 operand tests with 4 modifications and 4 skip condition modes

    // aa test mask, from PDP-10 Reference Manual, page 2-47
    // Mnemonic  Instruction  IR aa 5 8  Arithmetic
    // TRo[m]    Test Right         0 0  C(AC) AND 0,,E
    // TLo[m]    Test Left          0 1  C(AC) AND E,,0
    // TDo[m]    Test Direct        1 0  C(AC) AND C(E)
    // TSo[m]    Test Swapped       1 1  C(AC) AND right(C(E))+left(C(E))
    // IR.5 is 0,,E vs C(E), IR.8 is swap L and R of input from 0,,E or C(E)

    // oo modification, from PDP-10 Reference Manual, page 2-47
    // Suffix  Modification  IR oo 3 4  Arithmetic
    // N       No                  0 0  no writeback
    // Z       Zeros               0 1  C(AC) = C(AC) ANDC xxx
    // C       Complement          1 0  C(AC) = C(AC) XOR xxx
    // O       Ones                1 1  C(AC) = C(AC) OR xxx

    // mm condition mode, from PDP-10 Reference Manual, page 2-48
    // Suffix  Mode  IR mm 6 7  skip on
    // -       Never       0 0  nothing
    // E       Equal       0 1  zero(OR of masked bits 0..35 = 0)
    // A       Always      1 0  inverse of -
    // N       Not Equal   1 1  inverse of E
    // IR.6 is invert test, IR.7 is test zero, this is similar to arith test


    // first step, test pattern half word swapper

    System.out.print("Bit Test, Swapper: data " + pos());

    // data path 2:1-Mux by IR.OP.8 test pattern half word swapper from Immed
    //   -/ANDC/XOR/OR modify unit with 2 function select lines
    //   AR register (for ANDC/XOR/OR writeback) in modify units FFs
    //   bitwise AND data with test pattern and wide-NOR for zero/non-zero

    Pin BtestSwap_Imm[] = new Pin[DataBits],
        BtestSwap_ImmSwapped[] = new Pin[DataBits],
        BtestSwap_SelSwapped[] = new Pin[DataBits],
        BtestSwap[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // C(E) or 0,,E from Immed as test pattern
      BtestSwap_Imm[Bit] = pin_l(PinIn1);
      ImmedDataBus[Bit][ImmedDataPos[Bit]++] = BtestSwap_Imm[Bit];
      // swapped C(E) or 0,,E from Immed as test pattern
      BtestSwap_ImmSwapped[Bit] = pin_l(PinIn2);
      if (Bit >= 18) {
        // right: left half for pattern swap
        ImmedDataBus[Bit-18][ImmedDataPos[Bit-18]++] =
          BtestSwap_ImmSwapped[Bit]; }
      else {
        // left: right half for pattern swap
        ImmedDataBus[Bit+18][ImmedDataPos[Bit+18]++] =
          BtestSwap_ImmSwapped[Bit]; }
      // mux sel to swap the half words
      BtestSwap_SelSwapped[Bit] = pin_l(PinIn3);
      lut(I1&(~I3)|I2&I3, "BtestSwap"+Bit);
      BtestSwap[Bit] = pin_l(PinOut);
      if (DebugDisp == 1) {
        // show what data the swapper selected
        slice(ClockFrom, SysClock); }
      nextlut(); }

    System.out.print(", control " + pos());

    // bit testing unit extension of processor state logic diagram
    // @in state insexec (Instruction Execute), continued from Irma insexec
    // @  elseif IR.OP=110oo0mma btestimmed (Btest from Immediate)
    // @    MAM-Mux=IR.AC               # 1st operand C(AC)
    // @    Imm-Mux=E                   # 2nd operand 0,,E
    // @    PC-Mux=PC+1                 # skip
    // @    if doit                     # apply "sub" and test
    // @      ClkE-PC=1
    // @    if IR.OP=11000amma
    // @      state=insget              # we are done
    // @    else
    // @      ClkE-btestAR=1            # apply "aao", store to btestAR
    // @      state=bteststoac
    // @  elseif IR.OP=110oo1mma btestmem (Btest from Memory)
    // @    MAM-Mux=MA                  # 2nd operand C(MA)
    // @    ClkE-AR=1                   # store to AR
    // @    state=btestsecmem           # get second operand and test
    // @in state btestsecmem (Btest Second from Memory)
    // @  MAM-Mux=IR.AC                 # 1st operand C(AC)
    // @  Imm-Mux=AR                    # 2nd operand from AR
    // @  PC-Mux=PC+1                   # skip
    // @  if doit                       # apply "sub" and test
    // @    ClkE-PC=1
    // @  if IR.OP=11000amma
    // @    state=insget                # we are done
    // @  else
    // @    ClkE-btestAR=1              # apply "aao", store to btestAR
    // @    state=bteststoac
    // @in state bteststoac (Btest Store to Accumulator)
    // @  MAM-Mux=IR.AC
    // @  MD-Mux=btest
    // @  ClkE-Mem=1
    // @  state=insget                  # we are done

    // bit test unit extension of instruction execution FSM
    // #decode btestinstr = insdecode&IR.OP.0&IR.OP.1&(~IR.OP.2)
    //   decode if start of an 110ooamma bit test instruction
    Pin BtestInstr_Insdecode = pin_l(PinIn1);
    IrmaInsdecodeBus[IrmaInsdecodePos++] = BtestInstr_Insdecode;
    Pin BtestInstr_Irop0 = pin_l(PinIn2);
    IrmaInstrBus[0][IrmaInstrPos[0]++] = BtestInstr_Irop0;
    Pin BtestInstr_Irop1 = pin_l(PinIn3);
    IrmaInstrBus[1][IrmaInstrPos[1]++] = BtestInstr_Irop1;
    Pin BtestInstr_Irop2 = pin_l(PinIn4);
    IrmaInstrBus[2][IrmaInstrPos[2]++] = BtestInstr_Irop2;
    lut(I1&I2&I3&(~I4), "BtestInstr");
    Pin BtestInstr = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #decode btestimmed = btestinstr&(~IR.OP.7)&IR.OP.8
    //   decode if start of an immediate mode binary test instruction
    Pin BtestImmed_Instr = pin_l(PinIn1);
    route(BtestInstr, BtestImmed_Instr);
    Pin BtestImmed_Irop5 = pin_l(PinIn2);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = BtestImmed_Irop5;
    lut(I1&(~I2)&I3, "BtestImmed");
    Pin BtestImmed = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #decode btestmem = btestinstr&(~((~IR.OP.7)&IR.OP.8))
    //   decode if start of an memory mode binary test instruction
    // #state btestsecmem = btestmem
    //   if come from btestmem, always fetch second operand
    //     same logic function, no own LUT, just add FF and OutQ Pin
    Pin BtestMem_Instr = pin_l(PinIn1);
    route(BtestInstr, BtestMem_Instr);
    Pin BtestMem_Irop5 = pin_l(PinIn2);
    IrmaInstrBus[5][IrmaInstrPos[5]++] = BtestMem_Irop5;
    lut(I1&(~((~I2)&I3)), "BtestMem and BtestMem2");
    Pin BtestMem = pin_l(PinOut);
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin BtestSecmem = pin_l(PinOutQ);
    nextlut();
    // not enough routing resources left to use G LUT
    alignlutf();
    // #state bteststoac = (btestimmed|btestsecmem)&(IR.OP.3|IR.OP.4)
    //   if come from btestimmed or btestsecmem, when any writeback mode
    Pin BtestStoac_Immed = pin_l(PinIn1);
    route(BtestImmed, BtestStoac_Immed);
    Pin BtestStoac_Secmem = pin_l(PinIn2);
    route(BtestSecmem, BtestStoac_Secmem);
    Pin BtestStoac_Irop3 = pin_l(PinIn3);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = BtestStoac_Irop3;
    Pin BtestStoac_Irop4 = pin_l(PinIn4);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = BtestStoac_Irop4;
    lut((I1|I2)&(I3|I4), "BtestStoac");
    slice(ClockFrom, SysClock);
    // on reset clear execution FSM bit to not start-up in this state
    lcell(FlipflopResetTo, FlipflopResetTo_GSR0InB1);
    slice(FlipflopSyncreset, FlipflopSyncreset_On);
    Pin BtestStoac = pin_l(PinOutQ);
    nextlut();
    // #state subpart btestinsget =
    // #  (btestimmed|btestsecmem)&(~(IR.OP.3|IR.OP.4))|bteststoac
    //   trigger next instr if come from btestimmed or btestsecmem,
    //     when no writback or if writeback done
    Pin BtestInsget1_Immed = pin_l(PinIn1);
    route(BtestImmed, BtestInsget1_Immed);
    Pin BtestInsget1_Secmem = pin_l(PinIn2);
    route(BtestSecmem, BtestInsget1_Secmem);
    Pin BtestInsget1_Irop3 = pin_l(PinIn3);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = BtestInsget1_Irop3;
    Pin BtestInsget1_Irop4 = pin_l(PinIn4);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = BtestInsget1_Irop4;
    lut((I1|I2)&(~(I3|I4)), "BtestInsget1");
    Pin BtestInsget1 = pin_l(PinOut);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    Pin BtestInsget_Insget1 = pin_l(PinIn1);
    route(BtestInsget1, BtestInsget_Insget1);
    Pin BtestInsget_Stoac = pin_l(PinIn2);
    route(BtestStoac, BtestInsget_Stoac);
    lut(I1|I2, "BtestInsget");
    Pin BtestInsget = pin_l(PinOut);
    route(BtestInsget, IrmaInsget_Btest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #buffer btest-SelSwapped = IR.OP.8
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower function select irrelevant
    Pin BtestSelSwapped_Irop8 = pin_l(PinIn1);
    IrmaInstrBus[8][IrmaInstrPos[8]++] = BtestSelSwapped_Irop8;
    lut(I1, "BtestSelSwapped");
    Pin BtestSelSwapped = pin_l(PinOut);
    routem(BtestSelSwapped, BtestSwap_SelSwapped);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    // #control subpart btestMAM-Mux=MA = btestmem
    //   only 1 input, no LUT, route 1 input direct to target
    route(BtestMem, MamEnMaddr_Btest);
    // commented out nextlut(), here despite no effect, for visual consistency
    //nextlut();
    // #control subpart atestMAM-Mux=IR.AC = btestimmed|btestsecmem|bteststoac
    Pin BtestMamEnIrac_Immed = pin_l(PinIn1);
    route(BtestImmed, BtestMamEnIrac_Immed);
    Pin BtestMamEnIrac_Secmem = pin_l(PinIn2);
    route(BtestSecmem, BtestMamEnIrac_Secmem);
    Pin BtestMamEnIrac_Stoac = pin_l(PinIn3);
    route(BtestStoac, BtestMamEnIrac_Stoac);
    lut(I1|I2|I3, "BtestMamEnIrac");
    Pin BtestMamEnIrac = pin_l(PinOut);
    route(BtestMamEnIrac, MamEnIrac_Btest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart btestClkE-AR = btestmem
    //   only 1 input, no LUT, route 1 input direct to target
    route(BtestMem, ArithClkE_Btest);
    // commented out nextlut(), here despite no effect, for visual consistency
    //nextlut();
    // #control subpart btestImm-Mux=E = btestimmed
    route(BtestImmed, ImmedSelMaddr_Btest);
    //nextlut();
    // #control subpart atestClkE-Mem = bteststoac
    route(BtestStoac, MdwmMemClkE_Btest);
    //nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('B', "tag Bit Test Instruction, Swapper");

    nextsli();


    // second step, -/ANDC/XOR/OR modify unit with 2 function select lines

    System.out.print("Bit Test 2, Modifier: data " + pos());

    Pin BtestMod_Mem[] = new Pin[DataBits],
        BtestMod_Swap[] = new Pin[DataBits],
        BtestMod_SelMod1[] = new Pin[DataBits],
        BtestMod_SelMod2[] = new Pin[DataBits];

    Pin BtestReg_ClkE[] = new Pin[DataBits/2],
        BtestReg[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      BtestMod_Mem[Bit] = pin_l(PinIn1);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = BtestMod_Mem[Bit];
      BtestMod_Swap[Bit] = pin_l(PinIn2);
      route(BtestSwap[Bit], BtestMod_Swap[Bit]);
      BtestMod_SelMod1[Bit] = pin_l(PinIn3);
      BtestMod_SelMod2[Bit] = pin_l(PinIn4);
      // modificat: Nothing 0, Zero I1 ANDC I2, Compl I1 XOR I2, Ones I1 OR I2
      lut(0x0000   &(~I3)&(~I4)|
          I1&(~I2) &  I3 &(~I4)|
          (I1^I2)  &(~I3)&  I4 |
          (I1|I2)  &  I3 &  I4 , "BtestMod"+Bit);
      // btest units arithmetic register, to then write back
      slice(ClockFrom, SysClock);
      if (Bit%2 == 1) {
        BtestReg_ClkE[Bit/2] = pin_s(PinCe); }
      BtestReg[Bit] = pin_l(PinOut);
      route(BtestReg[Bit], MdwmData_Btest[Bit]);
      nextlut();}

    System.out.print(", control " + pos());

    // #buffer btest-SelMod1 = IR.OP.4
    //   no logic, just buffer IR bits to reduce fan-out of IR.OP outputs
    //     gives faster instruction decode, slower function select irrelevant
    Pin BtestSelMod1_Irop4 = pin_l(PinIn1);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = BtestSelMod1_Irop4;
    lut(I1, "BtestSelMod1");
    Pin BtestSelMod1 = pin_l(PinOut);
    routem(BtestSelMod1, BtestMod_SelMod1);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #buffer btest-SelMod2 = IR.OP.3
    Pin BtestSelMod2_Irop3 = pin_l(PinIn1);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = BtestSelMod2_Irop3;
    lut(I1, "BtestSelMod2");
    Pin BtestSelMod2 = pin_l(PinOut);
    routem(BtestSelMod2, BtestMod_SelMod2);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control ClkE-btestAR = (btestimmed|btestsecmem)&(IR.OP.3|IR.OP.4)
    Pin BtestClkE_Immed = pin_l(PinIn1);
    route(BtestImmed, BtestClkE_Immed);
    Pin BtestClkE_Secmem = pin_l(PinIn2);
    route(BtestSecmem, BtestClkE_Secmem);
    Pin BtestClkE_Irop3 = pin_l(PinIn3);
    IrmaInstrBus[3][IrmaInstrPos[3]++] = BtestClkE_Irop3;
    Pin BtestClkE_Irop4 = pin_l(PinIn4);
    IrmaInstrBus[4][IrmaInstrPos[4]++] = BtestClkE_Irop4;
    if (DebugDisp == 1) {
      // always set AR, so that the test value is visible
      lut(0xFFFF, "BtestClkE, forced to allways enabled by debug"); }
    else {
      lut(I1|I2, "BtestClkE"); }
    Pin BtestClkE = pin_l(PinOut);
    routem(BtestClkE, BtestReg_ClkE);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('2', "tag Bit Test Instruction 2, Modifier");

    nextsli();


    // third step, AND data with test pattern and wide-NOR for zero/non-zero

    System.out.print("Bit Test 3, Zero: data " + pos());

    Pin BtestZero_Mem[] = new Pin[DataBits],
        BtestZero_Swap[] = new Pin[DataBits],
        BtestZero[] = new Pin[DataBits];

    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // analog to wide-NOR in Atest, but with an AND in front of NOT
      BtestZero_Mem[Bit] = pin_l(PinIn1);
      MdrmDataBus[Bit][MdrmDataPos[Bit]++] = BtestZero_Mem[Bit];
      BtestZero_Swap[Bit] = pin_l(PinIn2);
      route(BtestSwap[Bit], BtestZero_Swap[Bit]);
      // NOR of only 1 AND per LUT, because 1 LUT per bit, gives an NAND
      lut(~(I1&I2), "BtestZero"+Bit);
      if (Bit == DataLSB) {
        // wide-AND of NANDs, BX = 1, LUTs force 0
        slice(CarryBegin, CarryBegin_FromInBx); }
      lcell(CarryEnable, CarryEnable_Modify);
      slice(CarryValue, CarryValue_Zero);
      if (Bit == DataMSB) {
        // grab wide-NOR carry output for zero test result
        slice(OutBFrom, OutBFrom_Carry);
        // BtestZero as array because of Java compiler bug
        //   aborts with "may not be initialised" error
        //     instead of just warning, and no way to suppress it
        BtestZero[Bit] = pin_l(PinOutB); }
      if (DebugDisp == 1) {
        // show what data was fed into the wide-AND
        slice(ClockFrom, SysClock); }
      nextlut(); }

    System.out.print(", control " + pos());

    // #control subpart atestPC-Mux=PC+1 = btestimmed|btestsecmem
    // #control atestnow = atestPC-Mux=PC+1
    Pin BtestProgSelIncr_Immed = pin_l(PinIn1);
    route(BtestImmed, BtestProgSelIncr_Immed);
    Pin BtestProgSelIncr_Secmem = pin_l(PinIn2);
    route(BtestSecmem, BtestProgSelIncr_Secmem);
    lut(I1|I2, "BtestProgSelIncr");
    Pin BtestProgSelIncr = pin_l(PinOut);
    route(BtestProgSelIncr, ProgSelIncr_Btest);
    Pin BtestNow = BtestProgSelIncr;
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();
    // #control subpart btestClkE-PC = btestnow&((btestzero&IR.OP.7)^IR.OP.6)
    Pin BtestProgClkE_Now = pin_l(PinIn1);
    route(BtestNow, BtestProgClkE_Now);
    Pin BtestProgClkE_Zero = pin_l(PinIn2);
    route(AtestZero[DataMSB], BtestProgClkE_Zero);
    Pin BtestProgClkE_Irop7 = pin_l(PinIn3);
    IrmaInstrBus[7][IrmaInstrPos[7]++] = BtestProgClkE_Irop7;
    Pin BtestProgClkE_Irop6 = pin_l(PinIn4);
    IrmaInstrBus[6][IrmaInstrPos[6]++] = BtestProgClkE_Irop6;
    lut(I1&((I2&I3)^I4), "BtestProgClkE");
    Pin BtestProgClkE = pin_l(PinOut);
    route(BtestProgClkE, ProgClkE_Btest);
    if (DebugDisp == 1) {
      slice(ClockFrom, SysClock); }
    nextlut();

    System.out.println(", unused " + pos());

    lastlut();
    luttext('3', "tag Bit Test Instruction 3, Zero");

    nextsli();


// ------ 111dddddd.dooo input/output instruction unit section

    // implement the 111dddddd.dooo input/output instructions
    //   8 operations with 128 devices

    // ooo operation, from PDP-10 Reference Manual, page 2-133 to 2-134
    // Mnemonic  Instruction  IR ooo 10 11 12  Action if no Device
    // BLKI      Block In             0  0  0  incr C(E), zero C(C(E)), *
    // DATAI     Data In              0  0  1  zero C(E) 
    // BLKO      Block Out            0  1  0  incr C(E), NOP, *
    // DATAO     Data Out             0  1  1  NOP
    // CONO      Conditions Out       1  0  0  NOP
    // CONI      Conditions In        1  0  1  zero C(E)
    // CONSZ     Condit In Skip Zero  1  1  0  SKIP
    // CONSO     Condit In Skip One   1  1  1  NOP
    // * = if left(C(E)) != 0 SKIP
    // on KS-10 (Unibus IO) this is totally different, ignore that

    // dddddd.d address to select device used

    // --- nothing implemented yet
    // so these do nearly nothing so long there is no IO device attached
    //   and are only used inside drivers, wait until implementing IO devices


// ------ final stuff for routing of buses section

    // route the buses that could not be routed pin at a time

    System.out.print("Routing Buses, Mdrm: data " + pos());

    // this is needed to tidy up from use of single-routing for mdrm data bus
    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // make exactly fitting array, so no NullPointer error on routing
      Pin MdrmDataSinks[] = new Pin[MdrmDataPos[Bit]];
      for (int Pos = 0; Pos < MdrmDataPos[Bit]; Pos++) {
        MdrmDataSinks[Pos] = MdrmDataBus[Bit][Pos]; }
      routem(MdrmData[Bit], MdrmDataSinks);
      // no space actually used, step LUTs just for error message coordinates
      nextlut(); }

    System.out.println(", unused " + pos());

    // no space actually used, step slice just for error message coordinates
    nextsli();


    System.out.print("Routing Buses, Irma: data " + pos());

    // this is needed to tidy up from use of single-routing for instruction bus
    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // make exactly fitting array, so no NullPointer error on routing
      Pin IrmaInstrSinks[] = new Pin[IrmaInstrPos[Bit]];
      for (int Pos = 0; Pos < IrmaInstrPos[Bit]; Pos++) {
        IrmaInstrSinks[Pos] = IrmaInstrBus[Bit][Pos]; }
      routem(IrmaReg[Bit], IrmaInstrSinks);
      nextlut(); }

    System.out.print(", control " + pos());

    Pin IrmaInsaccSinks[] = new Pin[IrmaInsaccPos];
    for (int Pos = 0; Pos < IrmaInsaccPos; Pos++) {
      IrmaInsaccSinks[Pos] = IrmaInsaccBus[Pos]; }
    routem(IrmaInsacc, IrmaInsaccSinks);
    nextlut();

    Pin IrmaInsdecodeSinks[] = new Pin[IrmaInsdecodePos];
    for (int Pos = 0; Pos < IrmaInsdecodePos; Pos++) {
      IrmaInsdecodeSinks[Pos] = IrmaInsdecodeBus[Pos]; }
    routem(IrmaInsdecode, IrmaInsdecodeSinks);
    nextlut();

    System.out.println(", unused " + pos());

    nextsli();


    System.out.print("Routing Buses, Immed: data " + pos());

    // this is needed to tidy up from use of single-routing for immed data bus
    for (int Bit = DataLSB; Bit >= DataMSB; Bit--) {
      // make exactly fitting array, so no NullPointer error on routing
      Pin ImmedDataSinks[] = new Pin[ImmedDataPos[Bit]];
      for (int Pos = 0; Pos < ImmedDataPos[Bit]; Pos++) {
        ImmedDataSinks[Pos] = ImmedDataBus[Bit][Pos]; }
      routem(ImmedData[Bit], ImmedDataSinks);
      nextlut(); }

    System.out.println(", unused " + pos());

    nextsli();


// ------ revision text section

    // place text for revision info, visible in BoardScope or chip viewer

    // place revision text in the last/right column:slice of the FPGA
    lastsli();

    System.out.print("Revision Text: " + pos());

    // get date for revision info text
    Date TextDate = new Date();
    SimpleDateFormat TextDateform = new SimpleDateFormat("yyyy.MM.dd");
    String TextDateString = TextDateform.format(TextDate);

    // generate revision info text
    String TextRev = "pdp10.bit - Neil Franklin - " +
      TextDateString + " - " + DeviceName;

    for (int Char = 0; Char < TextRev.length(); Char++) {
      luttext(TextRev.charAt(Char), "RevisionText"+Char);
      nextlut(); }

    System.out.println(", unused " + pos()); } }
