diff --git a/.gitmodules b/.gitmodules index 4ab49125c06bff68c9c4395eea0c4f900f736d1e..29eca63154d0a8a70cd378682c02419a22f5e7a1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -26,3 +26,6 @@ path = asic_lib_tech url = https://git.soton.ac.uk/soclabs/asic_library_tech branch = main +[submodule "secworks-aes"] + path = secworks-aes + url = https://github.com/secworks/aes.git diff --git a/README.md b/README.md index b56275cd50204c78b042f895e9011fba3c23e5df..33ecccae1022eac37031650a029b30783baf550b 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,35 @@ -# Accelerator System Project +# nanosoc aes128 example Accelerator System Project -This repo is the top-level repository which contains all the relavant IP for integrating your custom hardware accelerator with the SoC Labs provided nanosoc chip design IP in forms of git subrepositories. +This repo is the top-level repository which contains all the relavant IP for integrating a secworks AES128 hardware accelerator with the SoC Labs provided nanosoc chip design IP clnewd from `https://git.soton.ac.uk/soclabs/accelerator-project` and adapted as a reference design for 128-bit key AES cipher acceleration. -### Fork this repository +The IP core used in this example is based on the secworks aes project core: https://github.com/secworks/aes secworks-aes + +This design instantiates a custom (AMBA-AHB) wrapper around the AES core to implement a memory-mapped 128-bit AES encrypt/decrypt accelerator that can be used as a software-driven peripheral or a semi-autonomous DMA subystem when 128-bit keys and variable length data payloads can be set up as scatter/gather descriptor chains for background processing. -In order to use this repository for your own project, we recommend that you fork a version of this repository first. In your forked version of the project you can add you accelerator as a subrepo if you are already using repositories, or alternatively add the source for you accelerator directly to your forked repository. ### Cloning this repository --- -This Repository contains multiple sub-repositories. In order to clone them with this repository, please use the following command: +This Repository contains multiple sub-repositories. In order to clone them with this top-level system repository, please use the following command: + +`git clone --recurse https://git.soton.ac.uk/soclabs/aes-128-project.git` + +then set current directory to the top of this project: + +`cd aes-128-project` -`git clone --recurse https://git.soton.ac.uk/soclabs/accelerator-project.git` +Add the AES IP core (packaged as a fusesoc component): -At this stage you can also add your submodule with: -`git submodule add` +`git submodule add https://github.com/secworks/aes.git secworks-aes` -After doing this you should update the projbranch file to include your repository name (as it appears in .gitmodules) and the branch. This will allow the set_env.sh script to pull in your repository when updates are made +This specific `secworks-aes` directory repository name is included in `.gitmodules` configuration; this ensures the set_env.sh script will pull in IP core repository when updates are made. -At this point you may also like to edit the /env/dependency_env.sh to include your accelerator directory for example: +The /env/dependency_env.sh file is already modified to include the accelerator directory: -`export ACCELERATOR_DIR="$SOCLABS_PROJECT_DIR/accelerator"` +`export ACCELERATOR_DIR="$SOCLABS_PROJECT_DIR/secworks-aes"` -## Setting up the Project Environment +### Setting up the Project Environment --- Every time you wish to run commands in this project, you will need to make sure the set environment script has been run for your current terminal session. This is done by moving to the top-level of the project and running: @@ -41,23 +47,30 @@ Once you have run a `source set_env.sh` in your current terminal, you are then a This runs a git pull on all repositories in your project. -## Project Structure +### Project Structure --- The core of the SoC is NanoSoC. This is an example, configurable system that is the main framework. It has many different memory-mapped regions, one of which is designed for the connection of accelerator subsystems called the expansion region. The expansion region is able to instantiate an accelerator_subsystem by default. This means that anyone using NanoSoC as a platform for accelerator experimentation will need to build an `accelerator_subsystem` rtl module. -There is an example file in `/system/src/accelerator_subsystem.v`. You will need to add an instantiation of your top level to this file. The connections at this level are an AHB subordinate, DMA data requist signals, and CPU interupt signals. +This AES128 project example file is instantiatd in `/system/src/accelerator_subsystem.v`. This provides the connections at this level for an AHB subordinate or target control register file, Direct Memory Address Controller input and out data (128-bit as 4x32-bit words) request signals to the DMA230 controller used in this design, and interrupt request signals to the NanoSoC CPU (Arm Cortex-M0 microprocessor). + +The DMA230 controller is configured in the project nanosoc_config file: + +`DMA_0_PL230_INCLUDE := yes` ### Using the makefiles and FLISTS +--- -Simulation, FPGA implementation, and ASIC synthesis can all be performed from the main makefile in the nanosoc_tech repository. In order for these to work correctly you must make sure the flist files include your accelerator source code. -You can add these to `/flist/project/accelerator`. It is recommended here to use environment variables, the top level of your project will be `$SOCLABS_PROJECT_DIR`. You can include files directly, or include other flists with the `-f` command at the start of the line. +Simulation, FPGA implementation, and ASIC synthesis can all be performed from the main `makefile` in the nanosoc_tech repository. +This project includes the `/flist/project/accelerator`. This uses the recommended environment variables, the top level of your project will be `$SOCLABS_PROJECT_DIR`. +In this case, the `soclabs_ahb_aes128_ctrl.v` file is explict include directives to add the secworks-aes src verilog RTL files. -### htmlgen design visualisation ---- +## htmlgen design visualisation +-- + A tool is provided to generate an html documentation tree to traverse and explore the design hierarchy: `htmlgen -f $SOCLABS_PROJECT_DIR/flist/project/top.flist` @@ -66,49 +79,91 @@ or populates the html/top/build/ directiory. Open `nanosoc_tb.html` to explore from the testbench down in to the design. -### Accelerator Subsystem ---- -`accelerator_subsystem` can either directly contain an accelerator (or multiple) or can instantiate accelerator wrappers which in turn instantiate accelerators. +## Accelerator Subsystem +-- -This module is expected to be found in `system/src/accelerator_subsystem.v`. +This example `accelerator_subsystem` directly contains the aes128 accelerator RTL in the `system/src/` directory. with the wrapper named `system/src/accelerator_subsystem.v` that nanosoc instantiates as the generic wrapper interface. ### Accelerator Wrapper --- -Accelerator wrappers are located in `wrapper/src`. These should instantiate accelerators and can use wrapper components within the `accelerator_wrapper_tech` repository to allow a conversion of valid//ready interfaces to a memory-mapped AHB interface. + +The example AES128 accelerator has a native AMBA 32-bit AHB interface so the wrapper components within the `accelerator_wrapper_tech` repository are not used in this case. + ## Running the simulation +-- + +The simulation scripts are contained in `$SOCLABS_PROJECT_DIR/simulate/socsim/` directory. + +This project runs all the standard nanosoc validation tests provided in the `nanosoc_tech/testcodes` directory to prove the nanosoc microcontrolleris functionally operational. + +The currently supported simulators are VCS, Excelium and QuestaSim. The default in the scriptsis QuestaSim (run_mti). (Edit the simulator target in nanosoc_tech/system/makefile for the simulator EDA tool used). + +You can also run `sim_%SIMULATOR%` and this will run the simulation from the GUI. + +To run the simulation the 'socsim' command executes the makefile in the 'nanosoc_tech' microcontroller framework. + +To ensure simulation tools and environment are setup correctly a simple testcase should run and produce a `TEST PASSED` message satisfactorily. + +`socsim test_nanosoc TESTNAME=hello ACCELERATOR=yes` + +in fact the simulation and regression scripts all include the `ACCELERATOR=yes` configuration: + +`socsim test_nanosoc TESTNAME=hello` + + +### Acccelerator specific validation programs --- -This design instantiates a custom (AMBA-AHB) wrapper around the AES core to implement a memory-mapped 128-bit AES encrypt/decrypt accelerator that can be used as a software-driven peripheral or a semi-autonomous DMA subystem when 128-bit keys and variable length data payloads can be set up as scatter/gather descriptor chains for background processing. +All the standard validation tests for nanosoc should run and pass as normal on the Arm Cortex-M0 processor (as provided in nanosoc_tech/testcodes directory). +Additional specifc AES128 accelerator validation programs are provided in the `system/testcodes` directory to be run from the Arm Cortex-M0 processor; +these have been added to the recommended `/system/testcodes/$TESTNAME` directory. -To run the simulation the 'socsim' command executes the makefile in the 'nanosoc_tech' microcontroller framework. (Edit the simulator target in nanosoc_tech/system/makefile for the simulator EDA tool used). Then use the: +This AES128 project design instantiates a custom (AMBA-AHB) wrapper around the AES core to implement a memory-mapped 128-bit AES encrypt/decrypt accelerator that can be used as a software-driven peripheral or a semi-autonomous DMA subystem when 128-bit keys and variable length data payloads can be set up as scatter/gather descriptor chains for background processing. -`socsim system_aes128 TESTNAME=aes128_tests` +The software only version of the tests runs on the Arm Cortex-M0 CPU only and uses `memcpy` operations to write 128-bit keys and 128-bit plaintext input data and 128-bit reads to extract the cipher blocks. -This runs the integration test program on the Arm Cortex-M0 processor using the 'system_aes128.sh' script provided in the simulate/socsim directory and the logs are produced in the simulate/sim/system_aes128/logs directory. +Usage is: -## Adding testcodes +`socsim test_nanosoc TESTNAME=aes128_tests_memcpy` -To add your own testcodes to be run from the Arm Cortex-M0 processor, it is recommnded these are added in the `/system/testcodes/$TESTNAME` directory. -To enable the makefiles to find your testcode, you should also add the name of your test to the `/system/testcodes/software_list.txt` file. +The DMA validation tests perform the same functional verification but use the Arm DMA230 DMA controller programming model to test 32-bit, 16-bit and 8-bit bus transactions to transfer keys and data blocks. This validates the memory-mapped IO operations correctly auot-trigger the AES on the final byte transfer of key or data text blocks - which ensures that non-word-aligned data can be processed by the accelerator. -It is recommended that you copy and edit one of the makefiles for compiling your software into .hex files. (for example in `/system/testcodes/adp_v4_cmd_tests/makefile`) You will have to edit the TESTNAME (line 46) and may also need to change some of the compiler options depending on your code. -You can then either simulate using the `socim test_accelerator TESTNAME=x` or alternatively from the `/nanosoc_tech` directory you can run `make run_%SIM% TESTNAME=x ACCELERATOR=yes` +This runs the integration test program on the Arm Cortex-M0 processor using the 'test_aes128.sh' script provided in the simulate/socsim directory and the logs are produced in the simulate/sim/test_aes128/logs directory. -The currently supported simulators are VCS, Excelium and Questa Sim. Currently we mostly use Quest Sim (run_mti) +The next test validates the DMA Controller IP is correctly installed and instantiated. In cut-down systems (such as the QuickStart nanosoc alternative) there is no DMA controller and this test will fail while the `memcpy` software will run. -You can also run `sim_%SIMULATOR%` and this will run the simulation from the GUI. +`socsim test_nanosoc TESTNAME=aes128_tests_dma230` + + +The full set of accelerator regression tests plus the nanosoc system tests can then be run with: + +`socsim regression_accelerator` + +the logfiles are written to +`$SOCLABS_PROJECT_DIR/simulate/sim/regression_<date_stamp>/` + + +NOTE: To enable the makefiles to find the addional testcode programs, the extra aes128 test names have been added to the `/system/testcodes/software_list.txt` file, as recommended. -When using the makefile, you must include the `ACCELERATOR=yes` directive to include your accelerator ## FPGA Builds +== + +The FPGA implementation instanties a full `nanosoc_chip` level of hierarchy - the pad-ring final chip level is replaced by an FPGA framework that supports the provision of clock, reset, serial wire debug and host communication channels, to support validation of the chip design much like the simulation top-level testbench. + +We currently have build files for the ARM MPS3 "Microcontroller Prototyping Platform", and four main-stream Xilinx ZYNQ-pbased that have full support for running the PYNQ Python & Jupyter notebook as network appliances: ZCU104, PYNQ-Z2, KRIA KR260 and KRIA KV260. + +The acceptable targets are defined in the `/nanosoc_tech/fpga/makefile.targets` and are `mps3, zcu104, z2, kr260, kv260` currently. + +To build the bitfiles you can run the `make build_fpga FPGA=%target% ACCELERATOR=yes` from the `$SOCLABS_PROJECT_DIR/nanosoc_tech/` directory. +(The `ACCELERATOR=yes` does have to be explicit for all make targets run in the `nanosoc_tech` directory) -We currently have build files for the ARM MPS3, PYNQ ZCU104, PYNQ Z2, KRIA KR260 and KRIA KV260. To build the bitfiles you can run the `make build_fpga FPGA=%target% ACCELERATOR=yes` from the `/nanosoc_tech/` directory. -The acceptable targets are defined in the `/nanosoc_tech/fpga/makefile.targets` and are `mps3, zcu104, z2, kr260, kv260` +This script will run the vivado build scripts (updated to AMD/Xilinx Vivado 2024.1). The output files required to configure the FPGA (Overlay in the case of the PYNQ targets) are built in the `$SOCLABS_PROJECT_DIR/imp/FPGA/targets/` directory. -This script will run the vivado build scripts. The output from this will be in the `/imp/FPGA` directory. ## ASIC Synthesis +== To run the ASIC synthesis you will first need to define a `$PHYS_IP` environment variable. This should point to the uncompressed Arm bundles for the TSMC 65nm LP node. @@ -117,4 +172,4 @@ For a Cadence synthesis flow use: 2. Run `make flist_genus_nanosoc ACCELERATOR=yes ASIC=yes` which will generate the top flist for the genus synthesiser tool 3. Run `make syn_genus ACCELERATOR=yes ASIC=yes` which will run the synthesis -The output from the synthesis will be in the `/imp/ASIC` directory. +The output from the synthesis will be in the `$SOCLABS_PROJECT_DIR/imp/FPGA/imp/ASIC` directory. diff --git a/asic_lib_tech b/asic_lib_tech index ddda4a64c07b133b6d60c539ac403fb6644bb55f..57e2056366551367fedfdd983eb659dc2076a8b1 160000 --- a/asic_lib_tech +++ b/asic_lib_tech @@ -1 +1 @@ -Subproject commit ddda4a64c07b133b6d60c539ac403fb6644bb55f +Subproject commit 57e2056366551367fedfdd983eb659dc2076a8b1 diff --git a/flist/project/accelerator.flist b/flist/project/accelerator.flist index b531759cd136c8d8d4db69280441aae860f011dd..b6263b759532b3427621e098ae8b894559a59314 100644 --- a/flist/project/accelerator.flist +++ b/flist/project/accelerator.flist @@ -13,11 +13,9 @@ // ============= Accelerator Module search path ============= -// ! Point this to your Accelerator RTL +// AES128 project Accelerator RTL +incdir+$(ACCELERATOR_DIR)/src/rtl +$(SOCLABS_PROJECT_DIR)/system/src/soclabs_ahb_aes128_ctrl.v -// ! Point this to your Wrapper RTL -$(SOCLABS_PROJECT_DIR)/wrapper/src/soclabs_ahb_aes128_ctrl.v - -// ! Point this to your Subsystem RTL +// nanosoc generic wrapper Subsystem RTL wrapper $(SOCLABS_PROJECT_DIR)/system/src/accelerator_subsystem.v diff --git a/flist/project/system.flist b/flist/project/system.flist index e7f9758e2798f4cf897bb85b03fd0c5641ddc4f7..315b59b1a2ef4b720ba4f6eb78adb77040568c0a 100644 --- a/flist/project/system.flist +++ b/flist/project/system.flist @@ -28,12 +28,5 @@ // - FPGA sram -f $(SOCLABS_FPGA_LIB_TECH_DIR)/flist/fpga_lib_mem_ip.flist -// - Accelerator Wrapper IP -//-f $(SOCLABS_WRAPPER_TECH_DIR)/flist/accelerator_wrapper_ip.flist - // - Bootrom Code RTL $(SOCLABS_PROJECT_DIR)/system/src/bootrom/verilog/bootrom.v - -// the chosen DMA controller -//-f $(SOCLABS_SLDMA350_TECH_DIR)/flist/sldma350_ahb.flist --f $(SOCLABS_SLDMA230_TECH_DIR)/flist/sldma230_ip.flist diff --git a/flist/project/top.flist b/flist/project/top.flist index 0b24f7c23e3f9d4ba36a066719a7317c788e4481..e2fde9a47da2a25438d06fe187f37e0f9f5ee3f2 100644 --- a/flist/project/top.flist +++ b/flist/project/top.flist @@ -33,7 +33,3 @@ // - CMSDK IP -f $(SOCLABS_PROJECT_DIR)/flist/ahb/ahb_ip.flist -f $(SOCLABS_PROJECT_DIR)/flist/apb/apb_ip.flist - -// the chosen DMA controller -//-f $(SOCLABS_SLDMA350_TECH_DIR)/flist/sldma350_ahb.flist --f $(SOCLABS_SLDMA230_TECH_DIR)/flist/sldma230_ip.flist \ No newline at end of file diff --git a/flist/project/top_ASIC.flist b/flist/project/top_ASIC.flist index dd1da6ca6c7f922ee1eedad31585c24e5ee0c8a1..0ecaa8e006dc0e5aea754877b7f5cd9eb5af1b79 100644 --- a/flist/project/top_ASIC.flist +++ b/flist/project/top_ASIC.flist @@ -34,5 +34,5 @@ $(SOCLABS_PROJECT_DIR)/system/src/defines/gen_defines.v // the chosen DMA controller --f $(SOCLABS_SLDMA350_TECH_DIR)/flist/sldma350_ahb.flist +//-f $(SOCLABS_SLDMA350_TECH_DIR)/flist/sldma350_ahb.flist //-f $(SOCLABS_SLDMA230_TECH_DIR)/flist/sldma230_ip.flist diff --git a/flist/project/top_FPGA.flist b/flist/project/top_FPGA.flist new file mode 100644 index 0000000000000000000000000000000000000000..a836797642a380d497e9ade0eb8902dd8ef8b638 --- /dev/null +++ b/flist/project/top_FPGA.flist @@ -0,0 +1,32 @@ +//----------------------------------------------------------------------------- +// Project Top-level Filelist System Filelist +// A joint work commissioned on behalf of SoC Labs, under Arm Academic Access license. +// +// Contributors +// +// David Mapstone (d.a.mapstone@soton.ac.uk) +// +// Copyright � 2021-3, SoC Labs (www.soclabs.org) +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// Abstract : Verilog Command File for Top-level Accelerator System +//----------------------------------------------------------------------------- + +// DESIGN_TOP nanosoc_chip + +// ============= Verilog library extensions =========== ++libext+.v+.vlib + +// ============= System Filelist ========================= +// - Defines RTL ++incdir+$(SOCLABS_PROJECT_DIR)/system/src/defines + +-f $(SOCLABS_PROJECT_DIR)/flist/project/system.flist + +// ============= Arm-IP Specific Filelists ========================= +// - NanoSoC Chip IP +-f $(SOCLABS_NANOSOC_TECH_DIR)/flist/nanosoc_FPGA.flist + +// - CMSDK IP +-f $(SOCLABS_PROJECT_DIR)/flist/ahb/ahb_ip.flist +-f $(SOCLABS_PROJECT_DIR)/flist/apb/apb_ip.flist diff --git a/nanosoc.config b/nanosoc.config new file mode 100644 index 0000000000000000000000000000000000000000..d206a68bebd8c013a950fca1e7123d955462704b --- /dev/null +++ b/nanosoc.config @@ -0,0 +1,40 @@ +#----------------------------------------------------------------------------- +# NanoSoC Configuration file +# A joint work commissioned on behalf of SoC Labs, under Arm Academic Access license. +# +# Copyright (C) 2021-3, SoC Labs (www.soclabs.org) +#----------------------------------------------------------------------------- + +#### IP Configuration +# !!EDIT this to point to the relevant logical directories of IP +ARM_CORSTONE_101_DIR ?= $(ARM_IP_LIBRARY_PATH)/latest/Corstone-101/logical +ARM_CORTEX_M0_DIR ?= $(ARM_IP_LIBRARY_PATH)/latest/Cortex-M0/logical + +# DMA_xxx_INCLUDE (yes or leave blank) +DMA_0_PL230_INCLUDE := yes +DMA_1_PL230_INCLUDE := +DMA_DMA350_INCLUDE := + +# DMA 350 options set to yes for one of them +# !! MAKE sure you run the correct configuration in the SLDMA350 directory +# Small configuration of DMA, 2 channels, no stream interface, no extended features +# Default configuration of DMA, 2 channels, stream interface, extended features +# Big configuration of DMA, 3 channels, stream interface, extended features +DMA350_SMALL := +DMA350_DEFAULT := +DMA350_BIG := + +# ADC Include (yes or leave blank) +ADC_0_INCLUDE:= +ADC_1_INCLUDE:= +ADC_2_INCLUDE:= +ADC_3_INCLUDE:= + +SNPS_PVT_TS_0_INCLUDE:= +SNPS_PVT_TS_1_INCLUDE:= +SNPS_PVT_TS_2_INCLUDE:= +SNPS_PVT_TS_3_INCLUDE:= +SNPS_PVT_TS_4_INCLUDE:= +SNPS_PVT_TS_5_INCLUDE:= +SNPS_PVT_PD_0_INCLUDE:= +SNPS_PVT_VM_0_INCLUDE:= \ No newline at end of file diff --git a/nanosoc_tech b/nanosoc_tech index 167f66a58e24c0cbd7f5641d5ee38957560b4245..9209e0a964a7d26613b91651d429e8d4f072cac5 160000 --- a/nanosoc_tech +++ b/nanosoc_tech @@ -1 +1 @@ -Subproject commit 167f66a58e24c0cbd7f5641d5ee38957560b4245 +Subproject commit 9209e0a964a7d26613b91651d429e8d4f072cac5 diff --git a/secworks-aes b/secworks-aes new file mode 160000 index 0000000000000000000000000000000000000000..43050a194c53576b53066015cdc92d8a664e74c5 --- /dev/null +++ b/secworks-aes @@ -0,0 +1 @@ +Subproject commit 43050a194c53576b53066015cdc92d8a664e74c5 diff --git a/simulate/socsim/regression_nanosoc.sh b/simulate/socsim/regression_nanosoc.sh index c1236bf0b3202ebca47bd492b52c68037b790678..737c8ae3ac20a8dadf5f225d81560b2d081223f9 100755 --- a/simulate/socsim/regression_nanosoc.sh +++ b/simulate/socsim/regression_nanosoc.sh @@ -26,6 +26,6 @@ cd $SOCLABS_PROJECT_DIR/simulate/sim/$SIM_NAME echo ${2} make -C $SOCLABS_NANOSOC_TECH_DIR regression_mti \ SIM_DIR=$SIM_DIR \ - ACCELERATOR=no \ + ACCELERATOR=yes \ ${@:2} diff --git a/simulate/socsim/test_nanosoc.sh b/simulate/socsim/test_nanosoc.sh index 45546a057ef5e1f0d41791ebfd4d3916adafead4..1bfacd413d1a7326dee7254953312d535c2a755a 100755 --- a/simulate/socsim/test_nanosoc.sh +++ b/simulate/socsim/test_nanosoc.sh @@ -26,6 +26,6 @@ cd $SOCLABS_PROJECT_DIR/simulate/sim/$SIM_NAME echo ${2} make -C $SOCLABS_NANOSOC_TECH_DIR run_mti \ SIM_DIR=$SIM_DIR \ - ACCELERATOR=no \ + ACCELERATOR=yes \ ${@:2} diff --git a/soctools_flow b/soctools_flow index 1fec0158c729f99888c692dc85696069be8e4006..3e339941d8b4536f4bd096fc4191a4e36e38978a 160000 --- a/soctools_flow +++ b/soctools_flow @@ -1 +1 @@ -Subproject commit 1fec0158c729f99888c692dc85696069be8e4006 +Subproject commit 3e339941d8b4536f4bd096fc4191a4e36e38978a diff --git a/system/src/soclabs_ahb_aes128_ctrl.v b/system/src/soclabs_ahb_aes128_ctrl.v new file mode 100644 index 0000000000000000000000000000000000000000..63b2bc31851807b3e5122ec4a1e9f2872d41a21c --- /dev/null +++ b/system/src/soclabs_ahb_aes128_ctrl.v @@ -0,0 +1,583 @@ + //----------------------------------------------------------------------------- +// top-level soclabs example AHB interface +// A joint work commissioned on behalf of SoC Labs, under Arm Academic Access license. +// +// Contributors +// +// David Flynn (d.w.flynn@soton.ac.uk) +// +// Copyright (C) 2023, SoC Labs (www.soclabs.org) +//----------------------------------------------------------------------------- + +module soclabs_ahb_aes128_ctrl( +// ------------------------------------------------------- +// MCU interface +// ------------------------------------------------------- + input wire ahb_hclk, // Clock + input wire ahb_hresetn, // Reset + input wire ahb_hsel, // Device select + input wire [15:0] ahb_haddr16, // Address for byte select + input wire [1:0] ahb_htrans, // Transfer control + input wire [2:0] ahb_hsize, // Transfer size + input wire [3:0] ahb_hprot, // Protection control + input wire ahb_hwrite, // Write control + input wire ahb_hready, // Transfer phase done + input wire [31:0] ahb_hwdata, // Write data + output wire ahb_hreadyout, // Device ready + output wire [31:0] ahb_hrdata, // Read data output + output wire ahb_hresp, // Device response +// stream data + output wire drq_ipdma128, // (to) DMAC input burst request + input wire dlast_ipdma128,// (from) DMAC input burst end (last transfer) + output wire drq_opdma128, // (to) DMAC output dma burst request + input wire dlast_opdma128,// (from) DMAC output burst end (last transfer) + output wire irq_key128, + output wire irq_ip128, + output wire irq_op128, + output wire irq_error, + output wire irq_merged // combined interrrupt request (to CPU) + ); + + //---------------------------------------------------------------- + // Internal parameter definitions. + //---------------------------------------------------------------- + +///typedef struct { +/// __I uint32_t CORE_NAME[2]; /* 0x0000-0007 */ +/// __I uint32_t CORE_VERSION; /* 0x0008-000B */ +/// uint32_t RESRV0C; /* 0x000C */ +/// __IO uint32_t CTRL; /* 0x0010 */ +/// __O uint32_t CTRL_SET; /* 0x0014 */ +/// __O uint32_t CTRLL_CLR; /* 0x0018 */ +/// __I uint32_t STATUS; /* 0x001c */ +/// __IO uint32_t QUAL; /* 0x0020 */ +/// uint32_t RESRV24[3]; /* 0x0024 - 2F*/ +/// __IO uint32_t DRQ_MSK; /* 0x0030 */ +/// __O uint32_t DRQ_MSK_SET; /* 0x0034 */ +/// __O uint32_t DRQ_MSK_CLR; /* 0x0038 */ +/// __I uint32_t DRQ_STATUS; /* 0x003C */ +/// __IO uint32_t IRQ_MSK; /* 0x0040 */ +/// __O uint32_t IRQ_MSK_SET; /* 0x0044 */ +/// __O uint32_t IRQ_MSK_CLR; /* 0x0048 */ +/// __I uint32_t IRQ_STATUS; /* 0x004C */ +/// uint32_t RESRV50[4076]; /* 0x0050-0x3FFC (4096-20 words) */ +/// __IO uint8_t KEY128[0x4000]; /* 0x4000-7FFF (0x3FFF is last alias) */ +/// __IO uint8_t TXTIP128[0x4000]; /* 0x8000-BFFF (0x3FFF is last alias) */ +/// __I uint8_t TXTOP128[0x4000]; /* 0xC000-FFFF (0x3FFF is last alias) */ +///} AES128_TypeDef; + + +// CORE ID + localparam ADDR_CORE_NAME0 = 16'h0000; + localparam ADDR_CORE_NAME1 = 16'h0004; + localparam ADDR_CORE_VERSION= 16'h0008; + localparam CORE_NAME0 = 32'h61657331; // "aes1" + localparam CORE_NAME1 = 32'h32382020; // "28 " + localparam CORE_VERSION = 32'h302e3031; // "0.01" + +// CTRL control register with bit-set/bit-clear options + localparam ADDR_CTRL = 16'h0010; + localparam ADDR_CTRL_SET = 16'h0014; + localparam ADDR_CTRL_CLR = 16'h0018; + localparam CTRL_REG_WIDTH = 8; + localparam CTRL_BIT_MAX = (CTRL_REG_WIDTH-1); + localparam CTRL_KEY_REQ_BIT = 0; + localparam CTRL_IP_REQ_BIT = 1; + localparam CTRL_OP_REQ_BIT = 2; + localparam CTRL_ERR_REQ_BIT = 3; + localparam CTRL_KEYOK_BIT = 4; + localparam CTRL_VALID_BIT = 5; + localparam CTRL_BYPASS_BIT = 6; + localparam CTRL_ENCODE_BIT = 7; +// STAT status regisyer + localparam ADDR_STAT = 16'h001c; + localparam STAT_REG_WIDTH = 8; + localparam STAT_BIT_MAX = (STAT_REG_WIDTH-1); + localparam STAT_KEYREQ_BIT = 0; + localparam STAT_INPREQ_BIT = 1; + localparam STAT_OUTREQ_BIT = 2; + localparam STAT_ERROR_BIT = 3; + localparam STAT_KEYOK_BIT = 4; + localparam STAT_VALID_BIT = 5; + +// QUAL qualifier field + localparam ADDR_QUAL = 16'h0020; + localparam QUAL_REG_WIDTH = 32; + localparam QUAL_BIT_MAX = (QUAL_REG_WIDTH-1); + +// DREQ DMAC request control with bit-set/bit-clear options + localparam ADDR_DREQ = 16'h0030; + localparam ADDR_DREQ_SET = 16'h0034; + localparam ADDR_DREQ_CLR = 16'h0038; + localparam ADDR_DREQ_ACT = 16'h003c; + localparam DREQ_REG_WIDTH = 3; + localparam DREQ_BIT_MAX = (DREQ_REG_WIDTH-1); + localparam REQ_KEYBUF_BIT = 0; + localparam REQ_IP_BUF_BIT = 1; + localparam REQ_OP_BUF_BIT = 2; + +// IREQ CPU interrupt request control with bit-set/bit-clear options + localparam ADDR_IREQ = 16'h0040; + localparam ADDR_IREQ_SET = 16'h0044; + localparam ADDR_IREQ_CLR = 16'h0048; + localparam ADDR_IREQ_ACT = 16'h004c; + localparam IREQ_REG_WIDTH = 4; + localparam IREQ_BIT_MAX = (IREQ_REG_WIDTH-1); + localparam REQ_ERROR_BIT = 3; + + localparam ADDR_KEY_BASE = 16'h4000; + localparam ADDR_KEY0 = 16'h4000; + localparam ADDR_KEY3 = 16'h400c; + localparam ADDR_KEY7 = 16'h401c; + + localparam ADDR_IBUF_BASE = 16'h8000; + localparam ADDR_IBUF_0 = 16'h8000; + localparam ADDR_IBUF_3 = 16'h800c; + + localparam ADDR_OBUF_BASE = 16'hc000; + localparam ADDR_OBUF_3 = 16'hc00c; + + + // -------------------------------------------------------------------------- + // Internal regs/wires + // -------------------------------------------------------------------------- + + reg [15:0] addr16_r; + reg sel_r; + reg wcyc_r; + reg rcyc_r; + reg [3:0] byte4_r; + + wire key128_load_ack; + wire ip128_load_ack; + wire op128_load_ack; + + // -------------------------------------------------------------------------- + // AHB slave byte buffer interface, support for unaligned data transfers + // -------------------------------------------------------------------------- + + wire [1:0] byt_adr = ahb_haddr16[1:0]; + // generate next byte enable decodes for Word/Half/Byte CPU/DMA accesses + wire [3:0] byte_nxt; + assign byte_nxt[0] = (ahb_hsize[1])|((ahb_hsize[0])&(!byt_adr[1]))|(byt_adr[1:0]==2'b00); + assign byte_nxt[1] = (ahb_hsize[1])|((ahb_hsize[0])&(!byt_adr[1]))|(byt_adr[1:0]==2'b01); + assign byte_nxt[2] = (ahb_hsize[1])|((ahb_hsize[0])&( byt_adr[1]))|(byt_adr[1:0]==2'b10); + assign byte_nxt[3] = (ahb_hsize[1])|((ahb_hsize[0])&( byt_adr[1]))|(byt_adr[1:0]==2'b11); + + // de-pipelined registered access signals + always @(posedge ahb_hclk or negedge ahb_hresetn) + if (!ahb_hresetn) + begin + addr16_r <= 16'h0000; + sel_r <= 1'b0; + wcyc_r <= 1'b0; + rcyc_r <= 1'b0; + byte4_r <= 4'b0000; + end else if (ahb_hready) + begin + addr16_r <= (ahb_hsel & ahb_htrans[1]) ? ahb_haddr16 : addr16_r; + sel_r <= (ahb_hsel & ahb_htrans[1]); + wcyc_r <= (ahb_hsel & ahb_htrans[1] & ahb_hwrite); + rcyc_r <= (ahb_hsel & ahb_htrans[1] & !ahb_hwrite); + byte4_r <= (ahb_hsel & ahb_htrans[1]) ? byte_nxt[3:0] : 4'b0000; + end + + +// pipelined "early" last access decodes, for PL230 dma_ack timing to deassert dma requests +// wire ahb128_last = ahb_hsel & ahb_htrans[1] & ahb_hready & ahb_haddr16[3] & ahb_haddr16[2] & byte_nxt[3]; +// wire ahb128_wlast = ahb_last & ahb_hwrite & |ahb_haddr[15:14]; // address phase of last write transfer +// wire ahb128_rlast = ahb_last & !ahb_hwrite & |ahb_haddr[15:14]; // address phase of last read transfer + + wire wlast128 = |ahb_haddr16[15:14] & addr16_r[3] & addr16_r[2] & byte4_r[3] & wcyc_r; // write last pulse + wire rlast128 = &ahb_haddr16[15:14] & addr16_r[3] & addr16_r[2] & byte4_r[3] & rcyc_r; // read last pulse + + //---------------------------------------------------------------- + // API register state and wiring + // + //---------------------------------------------------------------- + + reg [CTRL_BIT_MAX:0] control; + reg [QUAL_BIT_MAX:0] param; + reg [DREQ_BIT_MAX:0] drq_enable; + reg [IREQ_BIT_MAX:0] irq_enable; + + wire [STAT_BIT_MAX:0] status; + wire [DREQ_BIT_MAX:0] drq_active; + wire [IREQ_BIT_MAX:0] irq_active; + + wire [31:0] rd_keybuf; + wire [31:0] rd_ipbuf; + wire [31:0] rd_opbuf; + + //---------------------------------------------------------------- + // API write decoder + // + //---------------------------------------------------------------- + + wire sel_mode = sel_r & (addr16_r[15: 8] == 0); + wire sel_keybuf = sel_r & (addr16_r[15:14] == 1); + wire sel_ipbuf = sel_r & (addr16_r[15:14] == 2); + wire sel_opbuf = sel_r & (addr16_r[15:14] == 3); +// add address map "last" transfer signalling when last (byte) of alias map is written + wire alast_key128 = sel_keybuf & wcyc_r & (&addr16_r[13:2]) & byte4_r[3]; + wire alast_ip128 = sel_ipbuf & wcyc_r & (&addr16_r[13:2]) & byte4_r[3]; + wire alast_op128 = sel_opbuf & rcyc_r & (&addr16_r[13:2]) & byte4_r[3]; + + always @(posedge ahb_hclk or negedge ahb_hresetn) + if (!ahb_hresetn) begin + control <= {CTRL_REG_WIDTH{1'b0}}; + param <= {QUAL_REG_WIDTH{1'b0}}; + drq_enable <= {DREQ_REG_WIDTH{1'b0}}; + irq_enable <= {IREQ_REG_WIDTH{1'b0}}; + end + else if (sel_mode & wcyc_r & byte4_r[0]) + case ({addr16_r[15:2],2'b00}) + ADDR_CTRL : control <= ahb_hwdata[CTRL_BIT_MAX:0]; // overwrite ctl reg + ADDR_CTRL_SET: control <= ahb_hwdata[CTRL_BIT_MAX:0] | control; // bit set ctl mask pattern + ADDR_CTRL_CLR: control <= ~ahb_hwdata[CTRL_BIT_MAX:0] & control; // bit clear ctl mask pattern + ADDR_QUAL : param <= ahb_hwdata[QUAL_BIT_MAX:0]; // write qual pattern + ADDR_DREQ : drq_enable <= ahb_hwdata[DREQ_BIT_MAX:0]; // overwrite dreq reg + ADDR_DREQ_SET: drq_enable <= ahb_hwdata[DREQ_BIT_MAX:0] | drq_enable; // bit set dreq mask pattern + ADDR_DREQ_CLR: drq_enable <= ~ahb_hwdata[DREQ_BIT_MAX:0] & drq_enable; // bit clear dreq mask pattern + ADDR_IREQ : irq_enable <= ahb_hwdata[IREQ_BIT_MAX:0]; // overwrite ireq reg + ADDR_IREQ_SET: irq_enable <= ahb_hwdata[IREQ_BIT_MAX:0] | irq_enable; // bit set ireq mask pattern + ADDR_IREQ_CLR: irq_enable <= ~ahb_hwdata[IREQ_BIT_MAX:0] & irq_enable; // bit clear ireq mask pattern + default: ; + endcase + else if (sel_keybuf & wcyc_r & (dlast_ipdma128 | alast_key128)) // key terminate + drq_enable[0] <= 1'b0; + else if (sel_ipbuf & wcyc_r & (dlast_ipdma128 | alast_ip128)) // ip-buffer terminate + drq_enable[1] <= 1'b0; + else if (sel_opbuf & rcyc_r & (dlast_opdma128 | alast_op128)) // op-buffer complete + drq_enable[2] <= 1'b0; + + //---------------------------------------------------------------- + // API read decoder + // + //---------------------------------------------------------------- + +reg [31:0] rdata32; // mux read data + + always @* + begin : read_decoder + rdata32 = 32'hbad0bad; + if (sel_r & rcyc_r) + case ({addr16_r[15:2],2'b00}) + ADDR_CORE_NAME0 : rdata32 = CORE_NAME0; + ADDR_CORE_NAME1 : rdata32 = CORE_NAME1; + ADDR_CORE_VERSION : rdata32 = CORE_VERSION; + ADDR_CTRL : rdata32 = {{(32-CTRL_REG_WIDTH){1'b0}}, control}; + ADDR_STAT : rdata32 = {{(32-STAT_REG_WIDTH){1'b0}}, status}; + ADDR_QUAL : rdata32 = {{(32-QUAL_REG_WIDTH){1'b0}}, param}; + ADDR_DREQ : rdata32 = {{(32-DREQ_REG_WIDTH){1'b0}}, drq_enable}; + ADDR_DREQ_ACT : rdata32 = {{(32-DREQ_REG_WIDTH){1'b0}}, drq_active}; + ADDR_IREQ : rdata32 = {{(32-IREQ_REG_WIDTH){1'b0}}, irq_enable}; + ADDR_IREQ_ACT : rdata32 = {{(32-DREQ_REG_WIDTH){1'b0}}, irq_active}; + default: + if (sel_keybuf) rdata32 = rd_keybuf; + else if (sel_ipbuf) rdata32 = rd_ipbuf; + else if (sel_opbuf) rdata32 = rd_opbuf; + endcase + end // read_decoder + + assign ahb_hrdata = rdata32; + + assign ahb_hreadyout = 1'b1; // zero wait state interface + assign ahb_hresp = 1'b0; + + // -------------------------------------------------------------------------- + // Key Input Buffer - keybuf + // -------------------------------------------------------------------------- + + wire [127:0] key128_be; + + soclabs_iobuf_reg128 + #(.WRITE_ONLY (1), + .WRITE_ZPAD (0)) + u_reg128_key + ( + .clk (ahb_hclk ), // Clock + .rst_b (ahb_hresetn ), // Reset + .sel_r (sel_keybuf ), // Bank decode select + .wcyc_r (wcyc_r ), // Write cycle (wdata32 valid) + .rcyc_r (rcyc_r ), // Read cycle (return rdata32) + .word2_r (addr16_r[3:2] ), // Address for word select + .byte4_r (byte4_r[3:0] ), // Byte select decoded (up to 4 enabled) + .wdata32 (ahb_hwdata[31:0]), // Write data (byte lane qualified) + .rdata32 (rd_keybuf ), // Read data output + .dma128_ack (key128_load_ack ), // DMA burst acknowledge + .out128_le ( ), // Big-Endian 128-bit value + .out128_be (key128_be ) // Big-Endian 128-bit value + ); + + // -------------------------------------------------------------------------- + // Data Input Buffer - ipbuf + // -------------------------------------------------------------------------- + + wire [127:0] ip128_le; + wire [127:0] ip128_be; + + soclabs_iobuf_reg128 + #(.WRITE_ONLY (0), + .WRITE_ZPAD (1)) + u_reg128_ip + ( + .clk (ahb_hclk ), // Clock + .rst_b (ahb_hresetn ), // Reset + .sel_r (sel_ipbuf ), // Bank decode select + .wcyc_r (wcyc_r ), // Write cycle (wdata32 valid) + .rcyc_r (rcyc_r ), // Read cycle (return rdata32) + .word2_r (addr16_r[3:2] ), // Address for word select + .byte4_r (byte4_r[3:0] ), // Byte select decoded (up to 4 enabled) + .wdata32 (ahb_hwdata[31:0]), // Write data (byte lane qualified) + .rdata32 (rd_ipbuf ), // Read data output + .dma128_ack (ip128_load_ack ), // DMA burst acknowledge + .out128_le (ip128_le ), // Big-Endian 128-bit value + .out128_be (ip128_be ) // Big-Endian 128-bit value + ); + + // -------------------------------------------------------------------------- + // Data Output Buffer - opbufsel_keybuf + // -------------------------------------------------------------------------- + + wire [127:0] op128_be; + wire [127:0] op128_muxed = (control[CTRL_BYPASS_BIT]) ? ip128_be : op128_be; + + wire [31:0] op_slice32 [0:3]; + assign op_slice32[3] = {op128_muxed[ 7: 0],op128_muxed[ 15: 8],op128_muxed[ 23: 16],op128_muxed[ 31: 24]}; + assign op_slice32[2] = {op128_muxed[ 39: 32],op128_muxed[ 47: 40],op128_muxed[ 55: 48],op128_muxed[ 63: 56]}; + assign op_slice32[1] = {op128_muxed[ 71: 64],op128_muxed[ 79: 72],op128_muxed[ 87: 80],op128_muxed[ 95: 88]}; + assign op_slice32[0] = {op128_muxed[103: 96],op128_muxed[111:104],op128_muxed[119:112],op128_muxed[127:120]}; + + // 32-bit addressed read data + assign rd_opbuf = op_slice32[addr16_r[3:2]]; + + assign op128_load_ack = (sel_opbuf & rcyc_r & addr16_r[3] & addr16_r[2] & byte4_r[3]); + + // -------------------------------------------------------------------------- + // example aes128 engine timing + // -------------------------------------------------------------------------- + // -------------------------------------------------------------------------- + // AES-specific control interface + // -------------------------------------------------------------------------- + +wire aes128_encode = control[CTRL_ENCODE_BIT]; +wire aes256_keysize = 1'b0; + +wire aes_keyloaded_pulse = key128_load_ack; // pulse on last byte load of key128 +wire aes_dataloaded_pulse= ip128_load_ack; // pulse on last byte load of text128 +wire aes_ready; +wire aes_valid; + +// state machine control +reg aes_ready_del; +reg aes_init; +reg aes_next; +reg aes_key_busy; +reg aes_key_rdy; +reg aes_res_busy; +reg aes_res_rdy; +reg aes_err; + + always @(posedge ahb_hclk or negedge ahb_hresetn) + if (!ahb_hresetn) begin + aes_ready_del <= 1'b0; + aes_init <= 1'b0; + aes_next <= 1'b0; + aes_key_busy <= 1'b0; + aes_key_rdy <= 1'b0; + aes_res_busy <= 1'b0; + aes_res_rdy <= 1'b0; + aes_err <= 1'b0; + end else begin + aes_ready_del <= aes_ready; // delay for rising edge detect + aes_init <= aes_keyloaded_pulse; + aes_next <= aes_dataloaded_pulse; + aes_key_busy <= (aes_init) | (aes_key_busy & !(aes_ready & !aes_ready_del)); // hold until key expansion done + aes_key_rdy <= (aes_key_busy & aes_ready & !aes_ready_del) // expanded key ready + | (aes_key_rdy & !(sel_keybuf & wcyc_r)); // hold until any key update + aes_res_busy <= (aes_next) | (aes_res_busy & !(aes_ready & !aes_ready_del)); // hold until block processing done + aes_res_rdy <= (aes_res_busy & aes_ready & !aes_ready_del) // block ready + | (aes_res_rdy & !op128_load_ack); // hold until output transferred + aes_err <= (!aes_key_rdy & ((sel_ipbuf & wcyc_r) | (sel_opbuf & rcyc_r))) + | (aes_err & !(sel_keybuf & wcyc_r)); + end + + assign drq_active[REQ_KEYBUF_BIT] = control[CTRL_KEY_REQ_BIT] & (!aes_keyloaded_pulse & !aes_init & !aes_key_busy & !aes_key_rdy); + assign drq_active[REQ_IP_BUF_BIT] = control[CTRL_IP_REQ_BIT] & (!aes_dataloaded_pulse & !aes_next & !aes_res_busy & !aes_res_rdy & aes_key_rdy); + assign drq_active[REQ_OP_BUF_BIT] = control[CTRL_OP_REQ_BIT] & (!aes_res_busy & aes_res_rdy); + +// input DMA channel shared by Key and Data-In + assign drq_ipdma128 = (drq_enable[REQ_KEYBUF_BIT] & drq_active[REQ_KEYBUF_BIT] & !wlast128) // if key DMA enabled + | (drq_enable[REQ_IP_BUF_BIT] & drq_active[REQ_IP_BUF_BIT] & !wlast128) // if ip128 DMA requested + ; + +// output DMA channel for Data-Out + assign drq_opdma128 = (drq_enable[REQ_OP_BUF_BIT] & drq_active[REQ_OP_BUF_BIT] & !rlast128); // if op128 DMA requested + +// and Interrupt requests are masked out if corresponding DMA requests are enabled + assign irq_active[REQ_KEYBUF_BIT] = drq_active[REQ_KEYBUF_BIT] & !drq_enable[REQ_KEYBUF_BIT]; + assign irq_active[REQ_IP_BUF_BIT] = drq_active[REQ_IP_BUF_BIT] & !drq_enable[REQ_IP_BUF_BIT]; + assign irq_active[REQ_OP_BUF_BIT] = drq_active[REQ_OP_BUF_BIT] & !drq_enable[REQ_OP_BUF_BIT]; + assign irq_active[REQ_ERROR_BIT ] = control[CTRL_ERR_REQ_BIT] & aes_err; // error raised in SW + + assign irq_key128 = irq_active[REQ_KEYBUF_BIT] & irq_enable[REQ_KEYBUF_BIT]; + assign irq_ip128 = irq_active[REQ_IP_BUF_BIT] & irq_enable[REQ_IP_BUF_BIT]; + assign irq_op128 = irq_active[REQ_OP_BUF_BIT] & irq_enable[REQ_OP_BUF_BIT]; + assign irq_error = irq_active[REQ_ERROR_BIT ] & irq_enable[REQ_ERROR_BIT ]; +// merge and mask if not DRQ + assign irq_merged = irq_key128 | irq_ip128 | irq_op128 | irq_error; + + +// wire up status port + assign status[2:0] = control [2:0]; + assign status[STAT_ERROR_BIT] = (!aes_res_busy & !aes_key_rdy); + assign status[STAT_KEYOK_BIT] = aes_key_rdy; + assign status[STAT_VALID_BIT] = aes_res_rdy; + assign status[7:6] = control [7:6]; + + //---------------------------------------------------------------- + // core instantiation. + //---------------------------------------------------------------- + aes_core core( + .clk(ahb_hclk), + .reset_n(ahb_hresetn), + + .encdec(aes128_encode), + .init(aes_init), + .next(aes_next), + .ready(aes_ready), + + .key({key128_be,key128_be}), + .keylen(aes256_keysize), + + .block(ip128_be), + .result(op128_be), + .result_valid(aes_valid) + ); + +endmodule + +module soclabs_iobuf_reg128 + #( + parameter WRITE_ONLY = 0, + parameter WRITE_ZPAD = 0 + ) ( +// ------------------------------------------------------- +// de-pipelined register interface +// ------------------------------------------------------- +// ahb + input wire clk, // Clock + input wire rst_b, // Reset + input wire sel_r, // Bank decode select + input wire wcyc_r, // Write cycle (wdata32 valid) + input wire rcyc_r, // Read cycle (return rdata32) + input wire [1:0] word2_r, // Address for word select + input wire [3:0] byte4_r, // Byte select decoded (up to 4 enabled) + input wire [31:0] wdata32, // Write data (byte lae qualified) + output wire [31:0] rdata32, // Read data output + output wire dma128_ack, // DMA burst acknowledge + output wire [127:0] out128_le, // Litte-Endian 128-bit value + output wire [127:0] out128_be // Big-Endian 128-bit value +) ; + + reg [7:0] byte0 [0:3]; + reg [7:0] byte1 [0:3]; + reg [7:0] byte2 [0:3]; + reg [7:0] byte3 [0:3]; + reg ack128; + + wire zpad_cfg = (WRITE_ZPAD==0) ? 1'b0 : 1'b1; + +// byte-0 array; flush on write to word-0, byte-0 +// else addressed word byte-0 write + always @(posedge clk or negedge rst_b) + if (!rst_b) + begin byte0[0] <= 8'h00; byte0[1] <= 8'h00; byte0[2] <= 8'h00; byte0[3] <= 8'h00; end + else if (zpad_cfg & sel_r & wcyc_r & byte4_r[0] & !word2_r[1] & !word2_r[0]) // Z-PAD rest + begin byte0[0] <= wdata32[ 7: 0]; byte0[1] <= 8'h00; byte0[2] <= 8'h00; byte0[3] <= 8'h00; end + else if (sel_r & wcyc_r & byte4_r[0]) + byte0[word2_r[1:0]] <= wdata32[ 7: 0]; + +// byte-1 array; flush on write to word-0, byte-0 if byte-1 not also written +// flush rest on write to word-0, byte-0 and byte-1 also written +// else address word byte-1 write + always @(posedge clk or negedge rst_b) + if (!rst_b) + begin byte1[0] <= 8'h00; byte1[1] <= 8'h00; byte1[2] <= 8'h00; byte1[3] <= 8'h00; end + else if (zpad_cfg & sel_r & wcyc_r & !byte4_r[1] & !word2_r[1] & !word2_r[0] & byte4_r[0]) // Z-PAD + begin byte1[0] <= 8'h00; byte1[1] <= 8'h00; byte1[2] <= 8'h00; byte1[3] <= 8'h00; end + else if (zpad_cfg & sel_r & wcyc_r & byte4_r[1] & !word2_r[1] & !word2_r[0] & byte4_r[0]) // Z-PAD rest + begin byte1[0] <= wdata32[15: 8]; byte1[1] <= 8'h00; byte1[2] <= 8'h00; byte1[3] <= 8'h00; end + else if (sel_r & wcyc_r & byte4_r[1]) + byte1[word2_r[1:0]] <= wdata32[15: 8]; + +// byte-2 array; flush on write to word-0, byte-0 if byte-2 not also written +// flush rest on write to word-0, byte-0 and byte-2 also written +// else address word byte-2 write + always @(posedge clk or negedge rst_b) + if (!rst_b) + begin byte2[0] <= 8'h00; byte2[1] <= 8'h00; byte2[2] <= 8'h00; byte2[3] <= 8'h00; end + else if (zpad_cfg & sel_r & wcyc_r & !byte4_r[2] & !word2_r[1] & !word2_r[0] & byte4_r[0]) // Z-PAD + begin byte2[0] <= 8'h00; byte2[1] <= 8'h00; byte2[2] <= 8'h00; byte2[3] <= 8'h00; end + else if (zpad_cfg & sel_r & wcyc_r & byte4_r[2] & !word2_r[1] & !word2_r[0] & byte4_r[0]) // Z-PAD rest + begin byte2[0] <= wdata32[23:16]; byte2[1] <= 8'h00; byte2[2] <= 8'h00; byte2[3] <= 8'h00; end + else if (sel_r & wcyc_r & byte4_r[2]) + byte2[word2_r[1:0]] <= wdata32[23:16]; + +// byte-3 array; flush on write to word-0, byte-0 if byte-3 not also written +// flush rest on write to word-0, byte-0 and byte-3 also written +// else address word byte-3 write + always @(posedge clk or negedge rst_b) + if (!rst_b) + begin byte3[0] <= 8'h00; byte3[1] <= 8'h00; byte3[2] <= 8'h00; byte3[3] <= 8'h00; end + else if (zpad_cfg & sel_r & wcyc_r & !byte4_r[3] & !word2_r[1] & !word2_r[0] & byte4_r[0]) // Z-PAD + begin byte3[0] <= 8'h00; byte3[1] <= 8'h00; byte3[2] <= 8'h00; byte3[3] <= 8'h00; end + else if (zpad_cfg & sel_r & wcyc_r & byte4_r[3] & !word2_r[1] & !word2_r[0] & byte4_r[0]) // Z-PAD rest + begin byte3[0] <= wdata32[31:24]; byte3[1] <= 8'h00; byte3[2] <= 8'h00; byte3[3] <= 8'h00; end + else if (sel_r & wcyc_r & byte4_r[3]) + byte3[word2_r[1:0]] <= wdata32[31:24]; + + // ack on write to final byte [15] + always @(posedge clk or negedge rst_b) + if (!rst_b) + ack128 <= 1'b0; + else + ack128 <= sel_r & wcyc_r & word2_r[1] & word2_r[0] & byte4_r[3]; + + assign dma128_ack = ack128; + +// byte reverse per word for Big Endian AES engine + assign out128_be = {byte0[0], byte1[0], byte2[0], byte3[0], + byte0[1], byte1[1], byte2[1], byte3[1], + byte0[2], byte1[2], byte2[2], byte3[2], + byte0[3], byte1[3], byte2[3], byte3[3]}; + +// byte reverse per word for Big Endian AES engine + assign out128_le = {byte3[3], byte2[3], byte1[3], byte0[3], + byte3[2], byte2[2], byte1[2], byte0[2], + byte3[1], byte2[1], byte1[1], byte0[1], + byte3[0], byte2[0], byte1[0], byte0[0]}; + +// little-endian read data (if not Write-Only) + assign rdata32 = (sel_r & rcyc_r & (WRITE_ONLY == 0)) + ? {byte3[word2_r[1:0]], byte2[word2_r[1:0]], + byte1[word2_r[1:0]], byte0[word2_r[1:0]]} + : 32'h00000000; + +endmodule + +// include SecWorks IP but fix up default_nettype issues that breaks elsewhere + +`include "aes_core.v" +`default_nettype wire +`include "aes_encipher_block.v" +`default_nettype wire +`include "aes_decipher_block.v" +`default_nettype wire +`include "aes_key_mem.v" +`default_nettype wire +`include "aes_sbox.v" +`default_nettype wire +`include "aes_inv_sbox.v" +`default_nettype wire diff --git a/system/testcodes/adp_v4_cmd_tests/adp_v4_cmd_tests.hex b/system/testcodes/adp_v4_cmd_tests/adp_v4_cmd_tests.hex index acc93360df2b95c8a78bbd7202099706fabb8a12..2e9dc9ae4a9c802bd1df25bb433bac9913437255 100644 --- a/system/testcodes/adp_v4_cmd_tests/adp_v4_cmd_tests.hex +++ b/system/testcodes/adp_v4_cmd_tests/adp_v4_cmd_tests.hex @@ -62,10 +62,6 @@ 02 00 00 -A1 -01 -00 -00 4F 02 00 @@ -110,6 +106,10 @@ A1 02 00 00 +A1 +01 +00 +00 4F 02 00 diff --git a/system/testcodes/aes128_tests_dma230/aes128_tests_dma230.hex b/system/testcodes/aes128_tests_dma230/aes128_tests_dma230.hex index e4dca2583571f8ac0bba3fdddbc7c3151136b42f..cea6ef55d8c10d2b88e092e4959081199df0b077 100644 --- a/system/testcodes/aes128_tests_dma230/aes128_tests_dma230.hex +++ b/system/testcodes/aes128_tests_dma230/aes128_tests_dma230.hex @@ -62,22 +62,6 @@ A0 13 00 00 -63 -0E -00 -00 -87 -0E -00 -00 -AB -0E -00 -00 -CF -0E -00 -00 83 13 00 @@ -122,6 +106,22 @@ CF 13 00 00 +63 +0E +00 +00 +87 +0E +00 +00 +AB +0E +00 +00 +CF +0E +00 +00 21 0E 00 @@ -1830,13 +1830,13 @@ FE 26 66 61 -00 +0B 20 00 F0 10 FC -00 +0B 20 00 F0 @@ -1846,13 +1846,13 @@ A5 61 E6 61 -01 +0C 20 00 F0 08 FC -01 +0C 20 00 F0 @@ -1862,13 +1862,13 @@ FB 62 66 62 -02 +0D 20 00 F0 00 FC -02 +0D 20 00 F0 @@ -1878,13 +1878,13 @@ A5 62 E6 62 -03 +0E 20 00 F0 F8 FB -03 +0E 20 00 F0 @@ -2666,7 +2666,7 @@ FD F0 69 FA -00 +0B 20 B7 E1 @@ -3554,19 +3554,19 @@ EC F0 AD F8 -01 +0C 20 00 F0 AA F8 -02 +0D 20 00 F0 A7 F8 -03 +0E 20 00 F0 diff --git a/system/testcodes/aes128_tests_dma230/makefile b/system/testcodes/aes128_tests_dma230/makefile index 3f41cf935611a5a40a401b3830b1a433c216f8b1..f2060d4ba17bdecf114ebfe62d8dbffe2b4c2960 100644 --- a/system/testcodes/aes128_tests_dma230/makefile +++ b/system/testcodes/aes128_tests_dma230/makefile @@ -175,6 +175,7 @@ $(TESTNAME).ELF : $(TESTNAME).o dma_pl230_driver.o $(SYSTEM_FILE).o $(STARTUP_FI $(TESTNAME).hex : $(TESTNAME).ELF fromelf --vhx --8x1 $< --output $@ + fromelf --bin $< --output $(TESTNAME).bin $(TESTNAME).lst : $(TESTNAME).ELF fromelf -c -d -e -s -z -v $< --output $@ diff --git a/system/testcodes/aes128_tests_memcpy/aes128_tests_memcpy.hex b/system/testcodes/aes128_tests_memcpy/aes128_tests_memcpy.hex index cc234191d303cae66b4b88f5584c19888b1c9934..badfbdf31cf309781d422bc4725341fd9a8b480f 100644 --- a/system/testcodes/aes128_tests_memcpy/aes128_tests_memcpy.hex +++ b/system/testcodes/aes128_tests_memcpy/aes128_tests_memcpy.hex @@ -62,22 +62,6 @@ F8 09 00 00 -1B -05 -00 -00 -3F -05 -00 -00 -63 -05 -00 -00 -87 -05 -00 -00 1F 09 00 @@ -122,6 +106,22 @@ F8 09 00 00 +1B +05 +00 +00 +3F +05 +00 +00 +63 +05 +00 +00 +87 +05 +00 +00 1F 09 00 @@ -814,13 +814,13 @@ B6 26 66 60 -00 +0B 20 00 F0 D3 FA -00 +0B 20 00 F0 @@ -830,13 +830,13 @@ A5 60 E6 60 -01 +0C 20 00 F0 CB FA -01 +0C 20 00 F0 @@ -846,13 +846,13 @@ FA 61 66 61 -02 +0D 20 00 F0 C3 FA -02 +0D 20 00 F0 @@ -862,13 +862,13 @@ A5 61 E6 61 -03 +0E 20 00 F0 BB FA -03 +0E 20 00 F0 @@ -1234,25 +1234,25 @@ C8 FE 84 19 -00 +0B 20 00 F0 F9 F9 -01 +0C 20 00 F0 F6 F9 -02 +0D 20 00 F0 F3 F9 -03 +0E 20 00 F0 diff --git a/system/testcodes/aes128_tests_memcpy/makefile b/system/testcodes/aes128_tests_memcpy/makefile index ef85d4e64d70bf3df1b5cc4963a54aa7f3debddc..ab493f7c2adafecd7f09fe677d4773c0c6d8a8a5 100644 --- a/system/testcodes/aes128_tests_memcpy/makefile +++ b/system/testcodes/aes128_tests_memcpy/makefile @@ -175,6 +175,7 @@ $(TESTNAME).ELF : $(TESTNAME).o dma_pl230_driver.o $(SYSTEM_FILE).o $(STARTUP_FI $(TESTNAME).hex : $(TESTNAME).ELF fromelf --vhx --8x1 $< --output $@ + fromelf --bin $< --output $(TESTNAME).bin $(TESTNAME).lst : $(TESTNAME).ELF fromelf -c -d -e -s -z -v $< --output $@ diff --git a/system/testcodes/software_list.txt b/system/testcodes/software_list.txt index c61c1804974f84fa7817d2edd10efee66bcf35e9..c8653177ca081aefa92b50b5ac1747e57ac1fae7 100644 --- a/system/testcodes/software_list.txt +++ b/system/testcodes/software_list.txt @@ -1 +1,3 @@ adp_v4_cmd_tests +aes128_tests_memcpy +aes128_tests_dma230