From 4b779cf0d2743a6be48d33524e191ec4edcebf35 Mon Sep 17 00:00:00 2001
From: Holger Hans Peter Freyther <holger@freyther.de>
Date: Wed, 25 Apr 2012 09:20:50 +0200
Subject: [PATCH] dvnixload/ubl copy as of pre 0.2.6 svn

---
 ChangeLog  |   18 +
 Makefile   |  201 +++++++++
 README     |   21 +
 TODO       |    7 +
 board.h    |   76 ++++
 common.h   |   98 +++++
 crc.c      |   78 ++++
 crc.h      |   34 ++
 davinci.c  |  528 +++++++++++++++++++++++
 davinci.h  |  463 ++++++++++++++++++++
 ddr.h      |  364 ++++++++++++++++
 dm35x.c    |  121 ++++++
 dm35x.h    |   64 +++
 dm644x.c   |  113 +++++
 dm644x.h   |   87 ++++
 gpio.c     |   91 ++++
 gpio.h     |   40 ++
 gunzip.c   |  868 ++++++++++++++++++++++++++++++++++++++
 gunzip.h   |   30 ++
 nand.c     |  975 ++++++++++++++++++++++++++++++++++++++++++
 nand.h     |   64 +++
 nandboot.c |  125 ++++++
 nor.c      | 1187 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 nor.h      |   44 ++
 norboot.c  |   72 ++++
 uart.c     |  287 +++++++++++++
 uart.h     |   48 +++
 uartboot.c |  261 ++++++++++++
 ubl.c      |  251 +++++++++++
 ubl.lds    |  103 +++++
 util.c     |   83 ++++
 util.h     |   35 ++
 32 files changed, 6837 insertions(+)
 create mode 100644 ChangeLog
 create mode 100644 Makefile
 create mode 100644 README
 create mode 100644 TODO
 create mode 100644 board.h
 create mode 100644 common.h
 create mode 100644 crc.c
 create mode 100644 crc.h
 create mode 100644 davinci.c
 create mode 100644 davinci.h
 create mode 100644 ddr.h
 create mode 100644 dm35x.c
 create mode 100644 dm35x.h
 create mode 100644 dm644x.c
 create mode 100644 dm644x.h
 create mode 100644 gpio.c
 create mode 100644 gpio.h
 create mode 100644 gunzip.c
 create mode 100644 gunzip.h
 create mode 100644 nand.c
 create mode 100644 nand.h
 create mode 100644 nandboot.c
 create mode 100644 nor.c
 create mode 100644 nor.h
 create mode 100644 norboot.c
 create mode 100644 uart.c
 create mode 100644 uart.h
 create mode 100644 uartboot.c
 create mode 100644 ubl.c
 create mode 100644 ubl.lds
 create mode 100644 util.c
 create mode 100644 util.h

diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..41e776c
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,18 @@
+v0.2.3
+	Mini-DAS: Enable DSP1 & DSP2 power.
+
+v0.2.4
+	Mini-DAS: Enable DSP1 & DSP2 power.
+	          Deactivate DSP1 & DSP2 reset lines.
+	
+
+v0.2.5
+	Mini-DAS: Enable all power supplys and deactivate
+	          all peripheral reset lines.
+
+v0.2.6
+	Mini-DAS: Add delay after applying DSP power and
+	          before releasing DSP reset lines.
+
+v0.2.7
+	Mini-DAS: Open-drain output for CAMERA RESET.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8f7379c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,201 @@
+#
+# Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ifndef CROSS_COMPILE
+CROSS_COMPILE=arm-linux-
+endif
+
+.PHONY : clean check
+
+CC=$(CROSS_COMPILE)gcc
+LD=$(CROSS_COMPILE)ld
+
+CFLAGS := -c -Os -Wall
+LDFLAGS := -Map ubl.map -nostdlib
+
+SOURCES := davinci.c uart.c uartboot.c ubl.c util.c gpio.c crc.c gunzip.c
+
+# Boards setup
+ifeq ($(BOARD),dvevm)
+# EVM for DM6446
+	PLATFORM   := DM644x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H64M16BT_3_162MHZ
+	DDR_SIZE   := 0x10000000 # 256MB
+endif
+ifeq ($(BOARD),sffsdr)
+	PLATFORM   := DM644x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H32M16BN_3_162MHZ
+	DDR_SIZE   := 0x08000000 # 128MB
+endif
+ifeq ($(BOARD),das)
+	PLATFORM   := DM644x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H64M16HR_3_162MHZ
+	DDR_SIZE   := 0x10000000 # 256MB
+endif
+ifeq ($(BOARD),minidas)
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H128M16HG_3IT_171MHZ
+	DDR_SIZE   := 0x10000000 # 256MB
+endif
+ifeq ($(BOARD),afeusb)
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H32M16BN_3_171MHZ
+	DDR_SIZE   := 0x04000000 # 64MB
+endif
+ifeq ($(BOARD),dm355evm)
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H64M16BT_37E_171MHZ
+	DDR_SIZE   := 0x08000000 # 128MB
+endif
+ifeq ($(BOARD),nor)
+# Only for testing NOR flash compilation
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NOR
+	DDR_TYPE   := MICRON_MT47H64M16BT_37E_171MHZ
+	DDR_SIZE   := 0x08000000 # 128MB
+endif
+
+# Generate a config.h file based on the board selected.
+# Only update this file if the selected board is different.
+OLDBOARD = $(shell cat config.h 2> /dev/null | grep "$(BOARD)")
+ifneq ($(OLDBOARD),$(BOARD))
+$(shell   echo "$(BOARD)" > config.h)
+endif
+
+CFLAGS += -D${PLATFORM} -D${FLASH_TYPE} -D$(DDR_TYPE) -Dboard_$(BOARD)
+
+# Processor type setup
+# The Instruction and Data accesses are differentiated via accessing different
+# memory map regions. The instruction region at 0x0000 and data region at
+# 0x8000 (0x10000 for DM35x) map to the same physical TCM RAM.
+ifeq ($(PLATFORM),DM644x)
+	SOURCES += dm644x.c
+	IRAM_SIZE  := 0x00004000
+	DRAM_START := 0x00008000
+	DRAM_SIZE  := 0x00004000
+endif
+ifeq ($(PLATFORM),DM35x)
+	SOURCES += dm35x.c
+	IRAM_SIZE  := 0x00008000
+	DRAM_START := 0x00010000
+	DRAM_SIZE  := 0x00008000
+endif
+
+LDFLAGS += --defsym __DDR_SIZE=$(DDR_SIZE) \
+           --defsym __IRAM_SIZE=$(IRAM_SIZE) \
+           --defsym __DRAM_START=$(DRAM_START) \
+           --defsym __DRAM_SIZE=$(DRAM_SIZE) \
+           -T ubl.lds
+
+# NAND flash setup
+ifeq ($(FLASH_TYPE),FLASH_TYPE_NAND)
+	SOURCES += nandboot.c nand.c
+endif
+ifeq ($(FLASH_TYPE),FLASH_TYPE_NOR)
+	SOURCES += norboot.c nor.c
+endif
+
+OBJECTS := $(patsubst %.c,%.o,$(SOURCES))
+EXECUTABLE := ubl.elf
+BINARY := $(EXECUTABLE)
+
+DEPS_DIR := .deps
+# Creation of the dependencies directory
+$(shell mkdir -p $(DEPS_DIR))
+
+ifneq ($(MAKECMDGOALS),clean)
+ifndef BOARD
+all:
+	@echo "You must select a board."
+	@echo "List of supported boards: evmdm6446 sffsdr das minidas afeusb evmdm355"
+	@echo "Example:"
+	@echo "  make BOARD=sffsdr"; exit 1
+else
+ifndef PLATFORM
+all:
+	@echo "Invalid board"; exit 1
+else
+all: $(BINARY)
+endif
+endif
+endif
+
+# Including the dependency files (except during clean rules, so Make won't
+# create them only to immediately remove them again). Each one of them will
+# become a target in this Makefile (that is why the 'include' command must be
+# placed after the 'all' target). If a dependency file is not found or is out
+# of date, it is built or updated.
+# If any have actually been changed, Make restarts with a clean state and
+# reads all the dependency makefiles over again.
+ifneq ($(MAKECMDGOALS),clean)
+ifneq "$(SOURCES)" ""
+ifdef BOARD
+-include $(patsubst %.c,$(DEPS_DIR)/%.d,$(SOURCES))
+endif
+endif
+endif
+
+clean:
+	-@rm -f -v *.o $(EXECUTABLE)
+	-@rm -f -v *.map
+	-@rm -f -v *~
+	-@rm -f -v config.h
+	-@rm -f -r $(DEPS_DIR)
+
+check:
+	-@checkpatch.pl --no-tree --file *.c *.h | more
+
+$(EXECUTABLE): $(OBJECTS)
+	$(LD) $(LDFLAGS) $(OBJECTS) -o $@
+
+# Any source files depend on automatically generated config.h.
+# This is necessary to recompile everything when we change boards.
+*.o: config.h $(LINKERSCRIPT)
+
+# The preprocessor of the compiler is used to generate a string representing
+# the dependencies of the input file. This is done invoking the compiler with
+# the -MM option (like -M but omit system header files). The purpose of the
+# sed script is to add the name of the dependency file (.d) to the string
+# returned by the preprocessor, like in the following example:
+#   "main.o: main.c main.h" would become "main.o main.d: main.c main.h"
+# The MAKE '$*' automatic variable represents the stem with which an implicit
+# rule match. This would be 'main' in the above example.
+#
+# Use of the $(SHELL) function: Double quotes must be used to surround the
+# command.
+#
+# In MAKE, using '$$' will produce a single dollar sign. When using only '$',
+# MAKE tries to expand the variable following the dollar sign. Additionally,
+# and for an obscure reason, '$1' must be preceded by a backslash on the
+# command line. This is why '\$$1' is used in the command line of the shell to
+# be seen as '$1' by the PERL script.
+#
+# The `-e' flag to the shell makes it exit immediately if the $(CC) command
+# fails (exits with a nonzero status). Normally the shell exits with the
+# status of the last command in the pipeline (sed in this case), so make would
+# not notice a nonzero status from the compiler.
+$(DEPS_DIR)/%.d: %.c
+	@echo "Generating dependencies for $<"
+	@$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $< | \
+	  sed '\''s/\($*\)\.o[ :]*/\1.o $(DEPS_DIR)\/$*.d : /g'\'' > $@; \
+	  [ -s $@ ] || rm -f $@'
diff --git a/README b/README
new file mode 100644
index 0000000..99207f6
--- /dev/null
+++ b/README
@@ -0,0 +1,21 @@
+ README for HVUBL
+
+This UBL can be used for flashing itself and a 2nd stage bootloader (usually
+U-boot) in flash memory.
+
+It can also be used to flash an arbitrary data image into flash, without
+a header.
+
+It can also be used to run DDR RAM memory testing.
+
+To compile HVUBL for the sffsdr board, for example, run:
+  $> make BOARD=sffsdr
+
+The Makefile honors the CROSS_COMPILE environment variable to specify the prefix
+of your ARM gcc toolchain. If it is not set, it defaults to:
+  CROSS_COMPILE=arm-linux-
+
+You can override it like this, for example:
+  $> make CROSS_COMPILE=arm-angstrom-linux-gnueabi- BOARD=sffsdr
+
+The output file, in ARM ELF format, will be named <ubl.elf>
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..fdba966
--- /dev/null
+++ b/TODO
@@ -0,0 +1,7 @@
+ TODO
+
+-Define DDR bus width and number of banks for each board.
+-NAND write & read page: do bound checking on
+ block number < maximum number of blocks.
+-When writing something other than UBL, use
+ Linux and U-Boot standard ECC layout.
diff --git a/board.h b/board.h
new file mode 100644
index 0000000..cc4df5b
--- /dev/null
+++ b/board.h
@@ -0,0 +1,76 @@
+/*
+ * board.h - board definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _BOARD_H_
+#define _BOARD_H_
+
+#include "common.h"
+#include "davinci.h"
+
+#if defined(board_dvevm)
+#  define PINMUX1_DEFAULT PINMUX1_UART0
+
+#elif defined(board_sffsdr)
+#  define PINMUX1_DEFAULT PINMUX1_UART0 | PINMUX1_UART1 | PINMUX1_I2C | \
+	PINMUX1_ASP
+
+#elif defined(board_das)
+#  define PINMUX0_DEFAULT PINMUX0_VLYNQEN | VLYNQ_WIDTH_4
+#  define PINMUX1_DEFAULT PINMUX1_UART0 | PINMUX1_UART2 | PINMUX1_I2C | \
+	PINMUX1_SPI
+
+#elif defined(board_dm355evm)
+#  define PINMUX0_DEFAULT 0x00007F55 /* All Video Inputs */
+#  define PINMUX1_DEFAULT 0x00145555 /* All Video Outputs */
+#  define PINMUX2_DEFAULT 0x00000004 /* EMIFA */
+#  define PINMUX3_DEFAULT 0x1BFF55FF /* SPI0, SPI1, UART1, I2C, SD0, SD1,
+				      * ASP0, CLKOUTs */
+#  define PINMUX4_DEFAULT 0x00000000 /* MMC/SD0 instead of MS, SPI0 */
+
+#elif defined(board_minidas)
+#  define PINMUX0_DEFAULT 0x00005C00 /* 8-bits video input, rest is GPIOs. */
+#  define PINMUX1_DEFAULT 0x00430000 /* All GPIOs (temporary: no PWM1 for buzzer) */
+#  define PINMUX2_DEFAULT 0x00000C0A /* EMIF A3:13, CE0 & CE1. */
+#  define PINMUX3_DEFAULT 0x0B7BAAC0 /* SPI0, SPI1, UART1, UART2, I2C, SD0,
+				      * CLKOUT1, CLKOUT2 */
+#  define PINMUX4_DEFAULT 0x00000001 /* MMC/SD0 + SPI0_SDI */
+
+/* Optional GPIO used as a status LED. Make sure to enable the corresponding
+ * PINMUX bit. */
+#define STATUS_LED    GPIO(71)
+#define DSP1_PWR_ENA  GPIO(95)
+#define DSP2_PWR_ENA  GPIO(94)
+#define HDD_ENA       GPIO(96)
+#define FULL_ENA      GPIO(68)
+#define ALCOHOL_ENA   GPIO(73)
+#define CAMERA_RESETn GPIO(72)
+#define FAN           GPIO(81)
+#define BUZZER        GPIO(80)
+#define WIFI_RESETn   GPIO(79)
+#define GPS_RESETn    GPIO(78)
+#define CAN_RESETn    GPIO(77)
+#define ATA_RESETn    GPIO(76)
+
+#endif
+
+#endif /* _BOARD_H_ */
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..76d52ac
--- /dev/null
+++ b/common.h
@@ -0,0 +1,98 @@
+/*
+ * common.h - common definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _COMMON_H_
+#define _COMMON_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h> /* For size_t */
+
+#include "board.h"
+
+/* Return types */
+#define E_PASS    0
+#define E_FAIL    1
+#define E_TIMEOUT 2
+
+/* Define this to have more verbose NAND debug messages */
+/* #define NAND_DEBUG 1 */
+
+/* Define this to write a RAMP into NAND for debugging. */
+/* #define NAND_DEBUG_WRITE_RAMP 1 */
+
+#define UBL_VERSION_STR "HV-UBL v0.2.11"
+
+/* Define this for bypassing the ECC check when reading from the NAND.
+ * This is useful for debugging or during development. */
+#define NAND_BYPASS_READ_PAGE_ECC_CHECK 1
+
+#define MAGIC_NUMBER_MASK       0xFFFFFF00
+#define MAGIC_NUMBER_VALID      0xA1ACED00
+
+/* RBL magic numbers */
+#define RBL_MAGIC_SAFE		0xA1ACED00 /* Describes UBL flash image type for
+					    * RBL. */
+
+/* UBL magic numbers */
+#define UBL_MAGIC_BIN_IMG	0xA1ACED66 /* Describes binary flash image type
+					    * for UBL. */
+#define UBL_MAGIC_GZIP_IMG	0xA1ACED77 /* Describes gzipped binary flash
+					    * image type for UBL. */
+
+/* UBL commands */
+#define UBL_CMD_FLASH_UBL_APP	0xA1ACEDCC /* Download UBL & application via
+					    * UART and burn in flash. */
+#define UBL_CMD_FLASH_DATA	0xA1ACEDCD /* Download data via UART and
+					    * burn in flash (no header in flash). */
+#define UBL_CMD_FLASH_ERASE	0xA1ACEDCE /* Erase the whole flash. */
+#define UBL_CMD_RUN_APP		0xA1ACEDDD /* Load and run application via UART. */
+#define UBL_CMD_DDR_TEST	0xA1ACEDEE /* Test DDR2 memory. */
+
+/* Define maximum downloadable image size */
+#define MAX_IMAGE_SIZE		0xC00000 /* 12 Mbytes */
+
+struct nor_boot_t {
+	uint32_t magicNum;
+	uint32_t entryPoint;	
+	uint32_t appSize;
+	uint32_t ldAddress;	/* Starting RAM address where image is to copied - XIP Mode */
+};
+
+enum bootmode_t {
+	NON_SECURE_NAND = 0, /* Non-secure NAND mode */
+	NON_SECURE_NOR,      /* Non-secure NOR mode */
+	UNKNOWN_MODE,        /* Unknown mode */
+	NON_SECURE_UART      /* Non-secure UART mode */
+};
+
+#define ENDIAN_SWAP(a) (((a&0xFF)<<24)|((a&0xFF0000)>>8)|((a&0xFF00)<<8)|((a&0xFF000000)>>24))
+
+/* Log functions */
+#define log_fail(_x_)  uart_send_str_lf(_x_)
+#define log_info(_x_)  uart_send_str_lf(_x_)
+#define log_debug(_x_) uart_send_str_lf(_x_)
+
+#define host_msg(_x_)  uart_send_str_lf(_x_)
+
+#endif /* _COMMON_H_ */
diff --git a/crc.c b/crc.c
new file mode 100644
index 0000000..58e8ce9
--- /dev/null
+++ b/crc.c
@@ -0,0 +1,78 @@
+/*
+ * crc.h -- CRC routines
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on dv-boot, original copyright follows:
+ *   Copyright (c) 2007 Sergey Kubushin <ksi@koi8.net>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <string.h> /* For size_t */
+
+#define CRC_TABLE_ELEMENTS 256
+#define DAVINCI_CRC_POLY   0x04C11DB7
+
+static uint32_t crc32_table[CRC_TABLE_ELEMENTS];
+
+static uint32_t
+reflect_num(uint32_t in_val, uint32_t num)
+{
+	uint32_t i;
+	uint32_t out_val = 0x0;
+
+	for (i = 1; i < (num + 1); i++) {
+		out_val |= (uint32_t)(((in_val & 0x1)) << (num - i));
+		in_val >>= 1;
+	}
+
+	return out_val;
+}
+
+/* Build a reflected CRC-32 table (for standard CRC-32 algorithm) */
+void
+crc32_dv_build_table(void)
+{
+	uint32_t i, j, crc_accum;
+
+	for (i = 0; i < CRC_TABLE_ELEMENTS; i++) {
+		crc_accum = reflect_num(i, 8) << (32 - 8);
+		for (j = 0; j < 8; j++) {
+			if ((crc_accum & 0x80000000) != 0x00000000)
+				crc_accum = (crc_accum << 1) ^ DAVINCI_CRC_POLY;
+			else
+				crc_accum = (crc_accum << 1);
+
+			crc32_table[i] = reflect_num(crc_accum, 32);
+		}
+	}
+}
+
+/* Compute CRC32 checksum */
+uint32_t
+crc32_dv_compute(uint8_t *data, size_t size)
+{
+	uint32_t crc32 = 0xFFFFFFFF;
+
+	while (size-- > 0)
+		crc32 = crc32_table[(crc32 ^ *data++) & 0xFF] ^ (crc32 >> 8);
+
+	return crc32;
+}
diff --git a/crc.h b/crc.h
new file mode 100644
index 0000000..ef18563
--- /dev/null
+++ b/crc.h
@@ -0,0 +1,34 @@
+/*
+ * crc.h -- CRC definitions.
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef CRC_H
+#define CRC_H 1
+
+#include <stdint.h>
+#include <string.h> /* For size_t, memcpy, memset */
+
+/* Build a reflected CRC-32 table (for standard CRC-32 algorithm) */
+void crc32_dv_build_table(void);
+
+/* Compute non-standard CRC32 */
+uint32_t
+crc32_dv_compute(uint8_t *data, size_t size);
+
+#endif /* CRC_H */
diff --git a/davinci.c b/davinci.c
new file mode 100644
index 0000000..ee8fae0
--- /dev/null
+++ b/davinci.c
@@ -0,0 +1,528 @@
+/*
+ * davinci.c - common DaVinci platform initialization
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "ddr.h"
+#include "util.h"
+#include "uart.h"
+#include "gpio.h"
+
+extern enum bootmode_t bootmode;
+extern const int8_t lpsc_en_list[];
+extern const int8_t lpsc_emurstie_list[];
+extern const size_t lpsc_en_list_len;
+extern const size_t lpsc_emurstie_list_len;
+
+/* Symbol from linker script */
+extern uint32_t __DDR_START;
+
+static void
+pinmuxControl(uint32_t regOffset, uint32_t mask, uint32_t value)
+{
+	SYSTEM->PINMUX[regOffset] &= ~mask;
+	SYSTEM->PINMUX[regOffset] |= (mask & value);
+}
+
+static void
+lpsc_tansition(uint8_t module, uint8_t domain, uint8_t state)
+{
+	/* Wait for any outstanding transition to complete */
+	while ((PSC->PTSTAT) & (0x00000001 << domain))
+		;
+
+	/* If we are already in that state, just return */
+	if (((PSC->MDSTAT[module]) & 0x1F) == state)
+		return;
+
+	/* Perform transition */
+	PSC->MDCTL[module] = ((PSC->MDCTL[module]) & (0xFFFFFFE0)) | (state);
+	PSC->PTCMD |= (0x00000001 << domain);
+
+	/* Wait for transition to complete */
+	while ((PSC->PTSTAT) & (0x00000001 << domain))
+		;
+
+	/* Wait and verify the state */
+	while (((PSC->MDSTAT[module]) & 0x1F) != state)
+		;
+}
+
+static void
+ivt_init(void)
+{
+	volatile uint32_t *ivect;
+	extern uint32_t __IVT;
+
+	if (bootmode == NON_SECURE_NOR) {
+		ivect = &(__IVT);
+		*ivect++ = 0xEAFFFFFE;  /* Reset @ 0x00*/
+	} else
+		ivect = &(__IVT) + 4;
+
+	*ivect++ = 0xEAFFFFFE;  /* Undefined Address @ 0x04 */
+	*ivect++ = 0xEAFFFFFE;  /* Software Interrupt @0x08 */
+	*ivect++ = 0xEAFFFFFE;  /* Pre-Fetch Abort @ 0x0C */
+	*ivect++ = 0xEAFFFFFE;  /* Data Abort @ 0x10 */
+	*ivect++ = 0xEAFFFFFE;  /* Reserved @ 0x14 */
+	*ivect++ = 0xEAFFFFFE;  /* IRQ @ 0x18 */
+	*ivect   = 0xEAFFFFFE;	/* FIQ @ 0x1C */
+}
+
+static int
+timer0_init(void)
+{
+	TIMER0->TGCR  = 0x00000000; /* Reset timer */
+	TIMER0->TCR   = 0x00000000; /* Disable timer */
+	TIMER0->TIM12 = 0x00000000; /* Reset timer count to zero */
+
+	/* Set timer period (5 seconds timeout) */
+	TIMER0->PRD12 = SYSTEM_CLK_HZ * 5;
+
+	return E_PASS;
+}
+
+void
+timer0_start(void)
+{
+	AINTC->IRQ1  |= 0x00000001; /* Clear interrupt */
+	TIMER0->TGCR  = 0x00000000; /* Reset timer */
+	TIMER0->TIM12 = 0x00000000; /* Reset timer count to zero */
+	TIMER0->TCR   = 0x00000040; /* Setup for one-shot mode */
+	TIMER0->TGCR  = 0x00000005; /* Start TIMER12 in 32-bits mode. */
+}
+
+uint32_t
+timer0_status(void)
+{
+	return AINTC->IRQ1 & 0x1;
+}
+
+static int
+uart0_init(void)
+{
+	UART0->PWREMU_MGNT = 0; /* Reset UART TX & RX components */
+	waitloop(100);
+
+	/* Set DLAB bit - allows setting of clock divisors */
+	UART0->LCR |= 0x80;
+
+	/*
+	 * Compute divisor value. Normally, we should simply return:
+	 *   SYSTEM_CLK_HZ / (16 * baudrate)
+	 * but we need to round that value by adding 0.5.
+	 * Rounding is especially important at high baud rates.
+	 */
+	UART0->DLL = (SYSTEM_CLK_HZ + (UART_BAUDRATE * (UART_BCLK_RATIO / 2))) /
+		(UART_BCLK_RATIO * UART_BAUDRATE);
+	UART0->DLH = 0x00;
+
+	UART0->FCR = 0x0007; /* Clear UART TX & RX FIFOs */
+	UART0->MCR = 0x0000; /* RTS & CTS disabled,
+			      * Loopback mode disabled,
+			      * Autoflow disabled
+			      */
+
+	UART0->LCR = 0x0003; /* Clear DLAB bit
+			      * 8-bit words,
+			      * 1 STOP bit generated,
+			      * No Parity, No Stick paritiy,
+			      * No Break control
+			      */
+
+	/* Enable receiver, transmitter, set to run.  */
+	UART0->PWREMU_MGNT |= 0x6001;
+
+	return E_PASS;
+}
+
+static int
+pll_init(volatile struct pll_regs_t *pll, int pll_mult, int plldiv_ratio[5])
+{
+	int k;
+	volatile uint32_t *plldiv_reg[5];
+	int pll_is_powered_up =
+		(pll->PLLCTL & DEVICE_PLLCTL_PLLPWRDN_MASK) >> 1;
+
+	plldiv_reg[0] = &pll->PLLDIV1;
+	plldiv_reg[1] = &pll->PLLDIV2;
+	plldiv_reg[2] = &pll->PLLDIV3;
+	plldiv_reg[3] = &pll->PLLDIV4;
+	plldiv_reg[4] = &pll->PLLDIV5;
+
+	/* Set PLL clock input to internal osc. */
+	pll->PLLCTL &= ~(DEVICE_PLLCTL_CLKMODE_MASK);
+
+	/* Set PLL to bypass, then wait for PLL to stabilize */
+	pll->PLLCTL &= ~(DEVICE_PLLCTL_PLLENSRC_MASK |
+			 DEVICE_PLLCTL_PLLEN_MASK);
+	waitloop(150);
+
+	/* Reset PLL: Warning, bit state is inverted for DM644x vs DM35x. */
+#if defined(DM644x)
+	pll->PLLCTL &= ~DEVICE_PLLCTL_PLLRST_MASK;
+#elif defined(DM35x)
+	pll->PLLCTL |= DEVICE_PLLCTL_PLLRST_MASK;
+#endif
+
+	if (pll_is_powered_up) {
+		/* Disable PLL */
+		pll->PLLCTL |= DEVICE_PLLCTL_PLLDIS_MASK;
+
+		/* Powerup PLL */
+		pll->PLLCTL &= ~(DEVICE_PLLCTL_PLLPWRDN_MASK);
+	}
+
+	/* Enable PLL */
+	pll->PLLCTL &= ~(DEVICE_PLLCTL_PLLDIS_MASK);
+
+	/* Wait for PLL to stabilize */
+	waitloop(150);
+
+	/* Load PLL multiplier. */
+	pll->PLLM = (pll_mult - 1) & 0xff;
+
+	/* Set and enable dividers as needed. */
+	for (k = 0; k < 5; k++) {
+		if (plldiv_ratio[k] > 0)
+			*(plldiv_reg[k]) |= DEVICE_PLLDIV_EN_MASK |
+				(plldiv_ratio[k] - 1);
+	}
+
+#if defined(DM35x)
+	/* Set the processor AIM wait state and PLL1 post-divider to to 1 */
+	SYSTEM->MISC &= ~(DEVICE_MISC_PLL1POSTDIV_MASK |
+			  DEVICE_MISC_AIMWAITST_MASK);
+#endif
+
+	/* Initiate a new divider transition. */
+	pll->PLLCMD |= DEVICE_PLLCMD_GOSET_MASK;
+
+	/* Wait for completion of phase alignment. */
+	while ((pll->PLLSTAT & DEVICE_PLLSTAT_GOSTAT_MASK))
+		;
+
+	/* Wait for PLL to reset ( ~5 usec ) */
+	waitloop(5000);
+
+	/* Release PLL from reset */
+
+	/* Reset PLL: Warning, bit state is inverted for DM644x vs DM35x. */
+#if defined(DM644x)
+	pll->PLLCTL |= DEVICE_PLLCTL_PLLRST_MASK;
+#elif defined(DM35x)
+	pll->PLLCTL &= ~DEVICE_PLLCTL_PLLRST_MASK;
+#endif
+
+	/* Wait for PLL to re-lock:
+	 * DM644z: 2000P
+	 * DM35x:  8000P
+	 */
+	waitloop(8000);
+
+	/* Switch out of BYPASS mode */
+	pll->PLLCTL |= DEVICE_PLLCTL_PLLEN_MASK;
+
+	return E_PASS;
+}
+
+static int
+pll1_init(void)
+{
+	int plldiv_ratio[5];
+
+#if defined(DM644x)
+	plldiv_ratio[0] =  1; /* PLLDIV1 fixed */
+	plldiv_ratio[1] =  2; /* PLLDIV2 fixed */
+	plldiv_ratio[2] =  3; /* PLLDIV3 fixed */
+	plldiv_ratio[3] = -1; /* PLLDIV4 not used */
+	plldiv_ratio[4] =  6; /* PLLDIV5 fixed */
+#elif defined(DM35x)
+	plldiv_ratio[0] =  2; /* PLLDIV1 fixed */
+	plldiv_ratio[1] =  4; /* PLLDIV2 fixed */
+
+	/* Calculate PLL divider ratio for divider 3 (feeds VPBE) */
+	plldiv_ratio[2] = 0;
+	while ((plldiv_ratio[2] * VPBE_CLK_HZ) <
+	       (SYSTEM_CLK_HZ * (PLL1_Mult >> 3)))
+		plldiv_ratio[2]++;
+
+	/* Check to make sure we can supply accurate VPBE clock */
+	if ((plldiv_ratio[2] * VPBE_CLK_HZ) !=
+	    (SYSTEM_CLK_HZ * (PLL1_Mult >> 3)))
+		return E_FAIL;
+
+	/* See the device datasheet for more info (must be 2 or 4) */
+	plldiv_ratio[3] =  4;
+	plldiv_ratio[4] = -1; /* PLLDIV5 not used */
+#endif
+
+	return pll_init(PLL1, PLL1_Mult, plldiv_ratio);
+}
+
+static int
+pll2_init(void)
+{
+	int plldiv_ratio[5];
+
+	plldiv_ratio[0] = PLL2_Div1;
+	plldiv_ratio[1] = PLL2_Div2;
+	plldiv_ratio[2] = -1; /* PLLDIV3 not used */
+	plldiv_ratio[3] = -1; /* PLLDIV4 not used */
+	plldiv_ratio[4] = -1; /* PLLDIV5 not used */
+
+	return pll_init(PLL2, PLL2_Mult, plldiv_ratio);
+}
+
+static void
+ddr_timing_setup(void)
+{
+	/* The configuration of DDRPHYCR is not dependent on the DDR2 device
+	 * specification but rather on the board layout.
+	 * Setup the read latency and clear DLLPWRDN */
+	DDR->DDRPHYCR = DDRPHYCR_DEFAULT |
+		(DDR_READ_Latency & DDRPHYCR_READLAT_MASK);
+
+	/*
+	 * Set the PR_OLD_COUNT bits in the Bus Burst Priority Register (PBBPR)
+	 * as suggested in TMS320DM6446 errata 2.1.2:
+	 *
+	 * On DM6446 Silicon Revision 2.1 and earlier, under certain conditions
+	 * low priority modules can occupy the bus and prevent high priority
+	 * modules like the VPSS from getting the required DDR2 throughput.
+	 */
+	DDR->PBBPR = DDR_PBBPR_PR_OLD_COUNT;
+
+	/* TIMUNLOCK (unlocked), CAS Latency, number of banks and page size */
+	DDR->SDBCR = SDBCR_DEFAULT |
+		SDBCR_TIMUNLOCK |
+		(DDR_NM << 14)   |
+		(DDR_CL << 9)    |
+		(DDR_IBANK << 4) |
+		(DDR_PAGESIZE << 0);
+
+	/* Program timing registers */
+	DDR->SDTIMR = (DDR_T_RFC << 25) |
+		(DDR_T_RP << 22)  |
+		(DDR_T_RCD << 19) |
+		(DDR_T_WR << 16)  |
+		(DDR_T_RAS << 11) |
+		(DDR_T_RC << 6)   |
+		(DDR_T_RRD << 3)  |
+		(DDR_T_WTR << 0);
+
+	DDR->SDTIMR2 = (DDR_T_XSNR << 16) |
+		(DDR_T_XSRD << 8)  |
+		(DDR_T_RTP << 5)   |
+		(DDR_T_CKE << 0);
+#if defined(DM35x)
+	DDR->SDTIMR2 |= (DDR_T_RASMAX << 27)  |
+		(DDR_T_XP << 25);
+#endif
+
+	/* Clear the TIMUNLOCK bit (locked) */
+	DDR->SDBCR &= ~SDBCR_TIMUNLOCK;
+
+	/* Set the refresh rate */
+	DDR->SDRCR = DDR_RR;
+}
+
+static void
+ddr_reset(void)
+{
+	/* Perform a soft reset to the DDR2 memory controller:
+	 * Put in SYNCRESET and enable it again. */
+	lpsc_tansition(LPSC_DDR2, PD0, PSC_SYNCRESET);
+	lpsc_tansition(LPSC_DDR2, PD0, PSC_ENABLE);
+}
+
+static int
+ddr_init(void)
+{
+	volatile uint32_t *ddr_start = &__DDR_START;
+	/* For reading/writing dummy value in order to apply timing settings */
+	volatile uint32_t ddr_dummy_read;
+
+	/* Enable DDR2 module. */
+	lpsc_tansition(LPSC_DDR2, PD0, PSC_ENABLE);
+
+#if defined(DM35x)
+	ddr_vtp_calibration();
+	ddr_reset();
+#endif
+
+	ddr_timing_setup();
+
+	/* Dummy read to apply timing settings */
+	ddr_dummy_read = ddr_start[0];
+
+#if defined(DM644x)
+	ddr_reset();
+	ddr_vtp_calibration();
+#endif
+
+	/* Verify correct initialization. */
+	ddr_start[0] = DDR_TEST_PATTERN;
+	if (ddr_start[0] != DDR_TEST_PATTERN) {
+		log_fail("DDR init failed");
+		return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+static void
+psc_init(void)
+{
+	uint32_t i;
+
+#if defined(DM35x)
+	/* Do always on power domain transitions */
+	while ((PSC->PTSTAT) & 0x00000001);
+#elif defined(DM644x)
+	/*
+	 * Workaround for TMS320DM6446 errata 1.3.22
+	 * (Revision(s) Affected: 1.3 and earlier):
+	 *   PSC: PTSTAT Register Does Not Clear After Warm/Maximum Reset.
+	 *   Clear the reserved location at address 0x01C41A20
+	 */
+	PSC_PTSTAT_WORKAROUND_REG = 0;
+
+	/* Put the C64x+ Core into reset (if it's on) */
+	PSC->MDCTL[LPSC_DSP] &= (~0x00000100);
+	PSC->PTCMD |= 0x00000002;
+	while ((PSC->PTSTAT) & (0x00000002));
+	while ((PSC->MDSTAT[LPSC_DSP]) & (0x00000100));
+#endif
+
+	/* Enable selected modules */
+	for (i = 0; i < lpsc_en_list_len; i++) {
+		int8_t k = lpsc_en_list[i];
+
+		PSC->MDCTL[k] = (PSC->MDCTL[k] & 0xFFFFFFE0) | PSC_ENABLE;
+	}
+
+	/* Set EMURSTIE on selected modules */
+	for (i = 0; i < lpsc_emurstie_list_len; i++) {
+		int8_t k = lpsc_emurstie_list[i];
+
+		PSC->MDCTL[k] |= EMURSTIE_MASK;
+	}
+
+	/* Do Always-On Power Domain Transitions */
+	PSC->PTCMD |= 0x00000001;
+	while ((PSC->PTSTAT) & 0x00000001);
+
+#if defined(DM644x)
+	/* DO DSP Power Domain Transitions */
+	PSC->PTCMD |= 0x00000002;
+	while ((PSC->PTSTAT) & (0x00000002));
+#endif
+
+	/* Clear EMURSTIE on selected modules */
+	for (i = 0; i < lpsc_emurstie_list_len; i++) {
+		int8_t k = lpsc_emurstie_list[i];
+
+		PSC->MDCTL[k] &= (~EMURSTIE_MASK);
+	}
+}
+
+int
+davinci_platform_init(char *version)
+{
+	int status = E_PASS;
+
+	psc_init();
+
+	/* Disable ARM interrupts */
+	AINTC->INTCTL = 0x4;
+	AINTC->EABASE = 0x0;
+	AINTC->EINT0  = 0x0;
+	AINTC->EINT1  = 0x0;
+
+	AINTC->FIQ0 = 0xFFFFFFFF;
+	AINTC->FIQ1 = 0xFFFFFFFF;
+	AINTC->IRQ0 = 0xFFFFFFFF;
+	AINTC->IRQ1 = 0xFFFFFFFF;
+
+#ifdef PINMUX0_DEFAULT
+	pinmuxControl(0, 0xFFFFFFFF, PINMUX0_DEFAULT);
+#endif
+#ifdef PINMUX1_DEFAULT
+	pinmuxControl(1, 0xFFFFFFFF, PINMUX1_DEFAULT);
+#endif
+/* The folowing are only available on DM35x */
+#ifdef PINMUX2_DEFAULT
+	pinmuxControl(2, 0xFFFFFFFF, PINMUX2_DEFAULT);
+#endif
+#ifdef PINMUX3_DEFAULT
+	pinmuxControl(3, 0xFFFFFFFF, PINMUX3_DEFAULT);
+#endif
+#ifdef PINMUX4_DEFAULT
+	pinmuxControl(4, 0xFFFFFFFF, PINMUX4_DEFAULT);
+#endif
+
+	if (status == E_PASS)
+		status |= pll1_init();
+
+	if (status == E_PASS)
+		status |= uart0_init();
+
+	if (status == E_PASS)
+		status |= timer0_init();
+
+	uart_send_lf();
+	log_info(version);
+
+	if (status == E_PASS)
+		status |= pll2_init();
+
+	if (status == E_PASS)
+		status |= ddr_init();
+
+#ifdef STATUS_LED
+	gpio_direction_out(STATUS_LED, 1);
+#endif /* STATUS_LED */
+
+#ifdef board_minidas
+	gpio_direction_out(FAN, 0);
+	gpio_direction_out(BUZZER, 0);
+
+	/* Put all peripherals in RESET state */
+	gpio_direction_out(DSP1_PWR_ENA, 0);
+	gpio_direction_out(DSP2_PWR_ENA, 0);
+	gpio_direction_out(WIFI_RESETn, 0);
+	gpio_direction_out(GPS_RESETn, 0);
+	gpio_direction_out(CAN_RESETn, 0);
+	gpio_direction_out(ATA_RESETn, 0);
+	gpio_direction_out(CAMERA_RESETn, 0);
+
+	/* Enable power for hard disk */
+	gpio_direction_out(HDD_ENA, 1);
+#endif
+
+	/* IRQ Vector Table Setup */
+	ivt_init();
+
+	return status;
+}
diff --git a/davinci.h b/davinci.h
new file mode 100644
index 0000000..2f81146
--- /dev/null
+++ b/davinci.h
@@ -0,0 +1,463 @@
+/*
+ * davinci.h - common DaVinci platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DAVINCI_H_
+#define _DAVINCI_H_
+
+#include "common.h"
+
+#if defined(DM644x)
+#include "dm644x.h"
+#elif defined(DM35x)
+#include "dm35x.h"
+#endif
+
+/* -------------------------------------------------------------------------- *
+ *    System Control Module register structure - See sprue14.pdf, Chapter 10  *
+ *       for more details.                                                    *
+ * -------------------------------------------------------------------------- */ 
+struct sys_module_regs_t {
+#if defined(DM644x)
+	uint32_t PINMUX[2];         //0x00
+	uint32_t DSPBOOTADDR;       //0x08
+	uint32_t SUSPSRC;           //0x0C
+	uint32_t INTGEN;            //0x10
+#elif defined(DM35x)
+	uint32_t PINMUX[5];         //0x00
+#endif
+	uint32_t BOOTCFG;           //0x14
+	uint32_t ARM_INTMUX;        //0x18 - ONLY ON DM35x
+	uint32_t EDMA_EVTMUX;       //0x1C - ONLY ON DM35x
+	uint32_t DDR_SLEW;          //0x20 - ONLY ON DM35x
+	uint32_t CLKOUT;            //0x24 - ONLY ON DM35x
+	uint32_t DEVICE_ID;         //0x28
+	uint32_t VDAC_CONFIG;       //0x2C - ONLY ON DM35x
+	uint32_t TIMER64_CTL;       //0x30 - ONLY ON DM35x
+	uint32_t USBPHY_CTL;        //0x34
+#if defined(DM644x)
+	uint32_t CHP_SHRTSW;        //0x38
+#elif defined(DM35x)
+	uint32_t MISC;              //0x38
+#endif
+	uint32_t MSTPRI[2];         //0x3C
+	uint32_t VPSS_CLKCTL;       //0x44
+#if defined(DM644x)
+	uint32_t VDD3P3V_PWDN;      //0x48
+	uint32_t DDRVTPER;          //0x4C
+	uint32_t RSVD2[8];          //0x50 
+#elif defined(DM35x)
+	uint32_t DEEPSLEEP;         //0x48
+	uint32_t RSVD0;             //0x4C
+	uint32_t DEBOUNCE[8];       //0x50
+	uint32_t VTPIOCR;           //0x70
+#endif
+};
+
+#define SYSTEM ((volatile struct sys_module_regs_t *) 0x01C40000)
+
+/* -------------------------------------------------------------------------- *
+ *    ARM Interrupt Controller register structure - See sprue26.pdf for more  *
+ *       details.                                                             *
+ * -------------------------------------------------------------------------- */
+struct aintc_regs_t {
+	uint32_t FIQ0;
+	uint32_t FIQ1;
+	uint32_t IRQ0;
+	uint32_t IRQ1;
+	uint32_t FIQENTRY;
+	uint32_t IRQENTRY;
+	uint32_t EINT0;
+	uint32_t EINT1;
+	uint32_t INTCTL;
+	uint32_t EABASE;
+	uint32_t RSVD0[2];
+	uint32_t INTPRI0;
+	uint32_t INTPRI1;
+	uint32_t INTPRI2;
+	uint32_t INTPRI3;
+	uint32_t INTPRI4;
+	uint32_t INTPRI5;
+	uint32_t INTPRI6;
+	uint32_t INTPRI7;
+};
+
+#define AINTC ((volatile struct aintc_regs_t *) 0x01C48000)
+
+/* -------------------------------------------------------------------------- *
+ *    PLL Register structure - See sprue14.pdf, Chapter 6 for more details.   *
+ * -------------------------------------------------------------------------- */
+struct pll_regs_t {
+	uint32_t PID;
+	uint32_t RSVD0[56];
+	uint32_t RSTYPE;    /* 0x0E4 */
+	uint32_t RSVD1[6];
+	uint32_t PLLCTL;    /* 0x100 */
+	uint32_t RSVD2[3];
+	uint32_t PLLM;      /* 0x110 */
+	uint32_t RSVD3;
+	uint32_t PLLDIV1;   /* 0x118 */
+	uint32_t PLLDIV2;
+	uint32_t PLLDIV3;
+	uint32_t RSVD4;
+	uint32_t POSTDIV;   /* 0x128 */
+	uint32_t BPDIV;
+	uint32_t RSVD5[2];
+	uint32_t PLLCMD;    /* 0x138 */
+	uint32_t PLLSTAT;
+	uint32_t ALNCTL;
+	uint32_t DCHANGE;
+	uint32_t CKEN;
+	uint32_t CKSTAT;
+	uint32_t SYSTAT;
+	uint32_t RSVD6[3];
+	uint32_t PLLDIV4;   /* 0x160 - Only on DM35x */
+	uint32_t PLLDIV5;   /* 0x164 - Only on DM644x */
+};
+
+#define PLL1 ((volatile struct pll_regs_t *) 0x01C40800)
+#define PLL2 ((volatile struct pll_regs_t *) 0x01C40C00)
+
+#define DEVICE_PLLCTL_CLKMODE_MASK  0x00000100
+#define DEVICE_PLLCTL_PLLEN_MASK    0x00000001
+#define DEVICE_PLLCTL_PLLPWRDN_MASK 0x00000002
+#define DEVICE_PLLCTL_PLLRST_MASK   0x00000008
+#define DEVICE_PLLCTL_PLLDIS_MASK   0x00000010
+#define DEVICE_PLLCTL_PLLENSRC_MASK 0x00000020
+
+#define DEVICE_PLLCMD_GOSET_MASK    0x00000001
+#define DEVICE_PLLSTAT_GOSTAT_MASK  0x00000001
+#define DEVICE_PLLDIV_EN_MASK       0x00008000
+#define DEVICE_PLLSTAT_LOCK_MASK    0x00000002
+
+/* -------------------------------------------------------------------------- *
+ *    Power/Sleep Ctrl Register structure - See sprue14.pdf, Chapter 7        * 
+ *       for more details.                                                    *
+ * -------------------------------------------------------------------------- */
+struct psc_regs_t {
+	uint32_t PID;         // 0x000
+	uint32_t RSVD0[3];    // 0x004
+	uint32_t GBLCTL;      // 0x010 - NOT ON DM35x
+	uint32_t RSVD1;       // 0x014
+	uint32_t INTEVAL;     // 0x018
+	uint32_t RSVD2[9];    // 0x01C
+	uint32_t MERRPR0;     // 0x040
+	uint32_t MERRPR1;     // 0x044
+	uint32_t RSVD3[2];    // 0x048
+	uint32_t MERRCR0;     // 0x050
+	uint32_t MERRCR1;     // 0x054
+	uint32_t RSVD4[2];    // 0x058
+	uint32_t PERRPR;      // 0x060
+	uint32_t RSVD5;       // 0x064
+	uint32_t PERRCR;      // 0x068
+	uint32_t RSVD6;       // 0x06C
+	uint32_t EPCPR;       // 0x070
+	uint32_t RSVD7;       // 0x074
+	uint32_t EPCCR;       // 0x078
+	uint32_t RSVD8[33];   // 0x07C
+	uint32_t RAILSTAT;    // 0x100 - NOT ON DM35x
+	uint32_t RAILCTL;     // 0x104 - NOT ON DM35x
+	uint32_t RAILSEL;     // 0x108 - NOT ON DM35x
+	uint32_t RSVD9[5];    // 0x10C
+	uint32_t PTCMD;       // 0x120
+	uint32_t RSVD10;      // 0x124
+	uint32_t PTSTAT;      // 0x128
+	uint32_t RSVD11[53];  // 0x12C
+	uint32_t PDSTAT0;     // 0x200
+	uint32_t PDSTAT1;     // 0x204
+	uint32_t RSVD12[62];  // 0x208
+	uint32_t PDCTL0;      // 0x300
+	uint32_t PDCTL1;      // 0x304
+	uint32_t RSVD13[134]; // 0x308
+	uint32_t MCKOUT0;     // 0x520
+	uint32_t MCKOUT1;     // 0x524
+	uint32_t RSVD14[182]; // 0x528
+	uint32_t MDSTAT[41];  // 0x800
+	uint32_t RSVD15[87];  // 0x8A4
+	uint32_t MDCTL[41];   // 0xA00
+};
+
+#define PSC ((volatile struct psc_regs_t*) 0x01C41000)
+
+#if defined(DM644x)
+/* See TMS320DM6446 errata 1.3.22 */
+#define PSC_PTSTAT_WORKAROUND_REG (*((volatile uint32_t*) 0x01C41A20))
+#endif
+
+#define PD0                 0
+
+/* PSC constants */
+#define LPSC_VPSS_MAST      0
+#define LPSC_VPSS_SLV       1
+#define LPSC_EDMACC         2
+#define LPSC_EDMATC0        3
+#define LPSC_EDMATC1        4
+#if defined(DM644x)
+#define LPSC_EMAC           5
+#define LPSC_EMAC_MEM_CTL   6
+#define LPSC_MDIO           7
+#define LPSC_RESERVED0      8
+#elif defined(DM35x)
+#define LPSC_TIMER3         5
+#define LPSC_SPI1           6
+#define LPSC_MMC_SD1        7
+#define LPSC_ASP1           8
+#endif
+#define LPSC_USB            9
+#if defined(DM644x)
+#define LPSC_ATA            10
+#define LPSC_VLYNQ          11
+#define LPSC_HPI            12
+#elif defined(DM35x)
+#define LPSC_PWM3           10
+#define LPSC_SPI2           11
+#define LPSC_RTO            12
+#endif
+#define LPSC_DDR2           13
+#define LPSC_AEMIF	    14
+#define LPSC_MMC_SD0        15
+#if defined(DM644x)
+#define LPSC_RESERVED1      16
+#elif defined(DM35x)
+#define LPSC_MEMSTK         16
+#endif
+#define LPSC_ASP0           17
+#define LPSC_I2C            18
+#define LPSC_UART0          19
+#if defined(DM35x)
+#define LPSC_UART1          20
+#define LPSC_UART2          21
+#define LPSC_SPIO           22
+#define LPSC_PWM0           23
+#define LPSC_PWM1           24
+#define LPSC_PWM2           25
+#endif
+#define LPSC_GPIO           26
+#define LPSC_TIMER0         27
+#define LPSC_TIMER1         28
+#if defined(DM35x)
+#define LPSC_TIMER2         29
+#define LPSC_SYSMOD         30
+#endif
+#define LPSC_ARM            31
+#if defined(DM644x)
+#define LPSC_DSP            39
+#define LPSC_IMCOP          40
+#elif defined(DM35x)
+#define LPSC_VPSS_DAC       40
+#endif
+
+#define EMURSTIE_MASK       0x00000200
+
+#define PSC_ENABLE          0x3
+#define PSC_DISABLE         0x2
+#define PSC_SYNCRESET       0x1
+#define PSC_SWRSTDISABLE    0x0
+
+/* -------------------------------------------------------------------------- *
+ *    DDR2 Memory Ctrl Register structure - See sprue22b.pdf for more details.*
+ * -------------------------------------------------------------------------- */
+struct ddr_mem_ctl_regs_t {
+	uint32_t RSVD0;
+	uint32_t SDRSTAT;
+	uint32_t SDBCR;
+	uint32_t SDRCR;
+	uint32_t SDTIMR;
+	uint32_t SDTIMR2;
+#if defined(DM644x)
+	uint32_t RSVD1[2];
+#elif defined(DM35x)
+	uint32_t RSVD1;
+	uint32_t SDBCR2;
+#endif
+	uint32_t PBBPR; /* 0x20 */
+	uint32_t RSVD2[39];
+	uint32_t IRR;   /* 0xC0 */
+	uint32_t IMR;
+	uint32_t IMSR;
+	uint32_t IMCR;
+	uint32_t RSVD3[5];
+	uint32_t DDRPHYCR;
+	uint32_t RSVD4[2];
+#if defined(DM644x)
+	uint32_t VTPIOCR; /* 0xF0 - In system control module for DM35x */
+#endif
+};
+
+#define DDR ((volatile struct ddr_mem_ctl_regs_t *) 0x20000000)
+
+#define DDR_TEST_PATTERN 0xA55AA55A
+
+#define SDBCR_TIMUNLOCK    (1 << 15)
+
+#if defined(DM644x)
+
+#define DDRVTPR (*((volatile uint32_t*) 0x01C42030))
+
+#define DDRPHYCR_DEFAULT      0x50006400 /* Default value with reserved fields */
+#define DDRPHYCR_READLAT_MASK (0x7 << 0)
+#define SDBCR_DEFAULT         0x00130000 /* Default value with reserved fields */
+
+#elif defined(DM35x)
+#define DDRPHYCR_DEFAULT      0x28006400 /* Default value with reserved fields */
+#define DDRPHYCR_READLAT_MASK (0xF << 0)
+#define SDBCR_DEFAULT         0x00170000 /* Default value with reserved fields */
+#endif
+
+/* -------------------------------------------------------------------------- *
+ *    AEMIF Register structure - See sprue20a.pdf for more details.           *
+ * -------------------------------------------------------------------------- */
+struct emif_regs_t {
+	uint32_t ERCSR;           // 0x00
+	uint32_t AWCCR;           // 0x04
+	uint32_t SDBCR;           // 0x08 - NOT ON DM35x
+	uint32_t SDRCR;           // 0x0C - NOT ON DM35x
+	uint32_t A1CR;            // 0x10
+	uint32_t A2CR;            // 0x14
+	uint32_t A3CR;            // 0x18 - NOT ON DM35x
+	uint32_t A4CR;            // 0x1C - NOT ON DM35x
+	uint32_t SDTIMR;          // 0x20 - NOT ON DM35x
+	uint32_t DDRSR;           // 0x24 - NOT ON DM35x
+	uint32_t DDRPHYCR;        // 0x28 - NOT ON DM35x
+	uint32_t DDRPHYSR;        // 0x2C - NOT ON DM35x
+	uint32_t TOTAR;           // 0x30 - NOT ON DM35x
+	uint32_t TOTACTR;         // 0x34 - NOT ON DM35x
+	uint32_t DDRPHYID_REV;    // 0x38 - NOT ON DM35x
+	uint32_t SDSRETR;         // 0x3C - NOT ON DM35x
+	uint32_t EIRR;            // 0x40
+	uint32_t EIMR;
+	uint32_t EIMSR;
+	uint32_t EIMCR;
+	uint32_t IOCTRLR;         // 0x50 - NOT ON DM35x
+	uint32_t IOSTATR;         // 0x54 - NOT ON DM35x
+	uint32_t RSVD0;
+	uint32_t ONENANDCTL;      // 0x5C - ONLY ON DM35x  
+	uint32_t NANDFCR;         // 0x60
+	uint32_t NANDFSR;         // 0x64
+	uint32_t RSVD1[2];
+	uint32_t NANDF1ECC;       // 0x70
+	uint32_t NANDF2ECC;       // 0x74
+	uint32_t NANDF3ECC;       // 0x78 - NOT ON DM35x
+	uint32_t NANDF4ECC;       // 0x7C - NOT ON DM35x
+	uint32_t RSVD2;           // 0x80
+	uint32_t IODFTECR;
+	uint32_t IODFTGCR;
+	uint32_t RSVD3;
+	uint32_t IODFTMRLR;       // 0x90
+	uint32_t IODFTMRMR;       // 0x94
+	uint32_t IODFTMRMSBR;     // 0x98
+	uint32_t RSVD4[5];
+	uint32_t MODRNR;          // 0xB0
+	uint32_t RSVD5[2];
+	uint32_t NAND4BITECCLOAD; // 0xBC - ONLY ON DM35x  
+	uint32_t NAND4BITECC1;    // 0xC0 - ONLY ON DM35x  
+	uint32_t NAND4BITECC2;    // 0xC4 - ONLY ON DM35x  
+	uint32_t NAND4BITECC3;    // 0xC8 - ONLY ON DM35x  
+	uint32_t NAND4BITECC4;    // 0xCC - ONLY ON DM35x  
+	uint32_t NANDERRADD1;     // 0xD0 - ONLY ON DM35x  
+	uint32_t NANDERRADD2;     // 0xD4 - ONLY ON DM35x  
+	uint32_t NANDERRVAL1;     // 0xD8 - ONLY ON DM35x  
+	uint32_t NANDERRVAL2;     // 0xDC - ONLY ON DM35x
+};
+
+#if defined(DM644x)
+#define AEMIF ((volatile struct emif_regs_t *) 0x01E00000)
+#elif defined(DM35x)
+#define AEMIF ((volatile struct emif_regs_t *) 0x01E10000)
+#endif
+
+/* -------------------------------------------------------------------------- *
+ *    UART Register structure - See sprue33.pdf for more details.             *
+ * -------------------------------------------------------------------------- */
+struct uart_regs_t {
+	uint32_t RBR;
+	uint32_t IER;
+	uint32_t IIR;
+	uint32_t LCR;
+	uint32_t MCR;
+	uint32_t LSR;
+	uint32_t MSR; /* NOT ON DM35x */
+	uint32_t SCR; /* NOT ON DM35x */
+	uint32_t DLL;
+	uint32_t DLH;
+	uint32_t PID1;
+	uint32_t PID2;
+	uint32_t PWREMU_MGNT;
+};
+
+#define THR RBR
+#define FCR IIR
+
+#define UART0 ((volatile struct uart_regs_t *) 0x01C20000)
+
+#define UART_BCLK_RATIO	16	/* BCLK is 16 times the baudrate */
+#define UART_BAUDRATE	115200
+
+/* -------------------------------------------------------------------------- *
+ *    Timer Register structure - See sprue26.pdf for more details.             *
+ * -------------------------------------------------------------------------- */
+struct timer_regs_t {
+	uint32_t PID12;
+	uint32_t EMUMGT_CLKSPD;
+	uint32_t GPINT_GPEN;    // NOT ON DM35x
+	uint32_t GPTDAT_GPDIR;  // NOT ON DM35x
+	uint32_t TIM12;
+	uint32_t TIM34;
+	uint32_t PRD12;
+	uint32_t PRD34;
+	uint32_t TCR;
+	uint32_t TGCR;
+	uint32_t WDTCR;
+	uint32_t RSVD1[3];       // 0x2C - ONLY ON DM35x
+	uint32_t REL12;          // 0x34 - ONLY ON DM35x
+	uint32_t REL34;          // 0x38 - ONLY ON DM35x
+	uint32_t CAP12;          // 0x3C - ONLY ON DM35x
+	uint32_t CAP34;          // 0x40 - ONLY ON DM35x
+	uint32_t INTCTL_STAT;    // 0x44 - ONLY ON DM35x
+};
+
+#define TIMER0 ((volatile struct timer_regs_t *) 0x01C21400)
+
+struct gpio_controller {
+	uint32_t dir;
+	uint32_t out_data;
+	uint32_t set_data;
+	uint32_t clr_data;
+	uint32_t in_data;
+	uint32_t set_rising;
+	uint32_t clr_rising;
+	uint32_t set_falling;
+	uint32_t clr_falling;
+	uint32_t intstat;
+};
+
+#define DAVINCI_GPIO_BASE 0x01C67000
+
+#define GPIOC ((volatile struct gpio_controller *) DAVINCI_GPIO_BASE)
+
+int davinci_platform_init(char *version);
+void ddr_vtp_calibration(void);
+
+void timer0_start(void);
+uint32_t timer0_status(void);
+
+#endif /* _DAVINCI_H_ */
diff --git a/ddr.h b/ddr.h
new file mode 100644
index 0000000..00eae70
--- /dev/null
+++ b/ddr.h
@@ -0,0 +1,364 @@
+/*
+ * ddr.h - DDR devices parameters
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DDDR_H_
+#define _DDDR_H_
+
+#include <stdint.h>
+
+#if defined(MICRON_MT47H32M16BN_3_171MHZ)
+/* Micron MT47H32M16BN-3 @ 171 MHz settings:
+ * TCK      = 5.85 nS -> 1 / 171MHz
+ * T_REF    = 7.8 uS (varies with commercial vs industrial)
+ * T_RFC    = 105 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 115 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 2;    /* 4 banks. */
+static const uint16_t DDR_RR    = 1336; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 17;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 9;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 8;  /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 19;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H32M16BN_3_162MHZ) /* SFFSDR */
+/* Micron MT47H32M16BN-3 @ 162 MHz settings:
+ * TCK      = 6.17 nS -> 1 / 162 MHz
+ * T_REF    = 7.8 uS (varies with commercial vs industrial)
+ * T_RFC    = 105 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 115 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 2;    /* 4 banks. */
+static const uint16_t DDR_RR    = 1265; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 16;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 8;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 8;  /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 18;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16BT_37E_171MHZ) /* EVM DM355 */
+/* Micron MT47H64M16BT-37E @ 171 MHz */
+static const uint8_t  DDR_IBANK = 3;	/* 8 banks. */
+static const uint16_t DDR_RR = 1336;
+static const uint8_t  DDR_CL = 3;
+static const uint8_t  DDR_T_RFC = 21;
+static const uint8_t  DDR_T_RP = 2;
+static const uint8_t  DDR_T_RCD = 2;
+static const uint8_t  DDR_T_WR = 2;
+static const uint8_t  DDR_T_RAS = 6;
+static const uint8_t  DDR_T_RC = 9;
+static const uint8_t  DDR_T_RRD = 1;
+static const uint8_t  DDR_T_WTR = 1;
+static const uint8_t  DDR_T_RASMAX = 7;
+static const uint8_t  DDR_T_XP = 2;
+static const uint8_t  DDR_T_XSNR = 23;
+static const uint8_t  DDR_T_XSRD = 199;
+static const uint8_t  DDR_T_RTP = 3;
+static const uint8_t  DDR_T_CKE = 3;
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16HR_3_162MHZ) /* DAS Commercial */
+/* Micron MT47H64M16HR-3 @ 162 MHz settings:
+ * TCK      = 6.17 nS -> 1 / 162 MHz
+ * T_REF    = 7.8 uS (varies with commercial vs industrial)
+ * T_RFC    = 127.5 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 138 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 1265; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 20;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 8;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 2;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 8;  /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 21;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16HR_3IT_162MHZ) /* DAS industrial */
+/* Micron MT47H64M16HR-3IT @ 162 MHz settings:
+ * TCK      = 6.17 nS -> 1 / 162 MHz
+ * T_REF    = 3.9 uS (varies with commercial vs industrial)
+ * T_RFC    = 127.5 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 138 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 635; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 20;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 8;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 2;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 15; /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 21;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16BT_3_162MHZ)
+/* Micron MT47H64M16HR-3IT @ 162 MHz settings:
+ * TCK      = 5.85 nS -> 1 / 162 MHz
+ * T_REF    = 3.9 uS (varies with commercial vs industrial)
+ * T_RFC    = 198 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 208 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 667; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 33;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 9;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 15; /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 34;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H128M16HG_3IT_171MHZ)
+/* Micron MT47H128M16HG-3IT @ 171 MHz settings:
+ * TCK      = 5.85 nS -> 1 / 171MHz
+ * T_REF    = 3.9 uS (varies with commercial vs industrial)
+ * T_RFC    = 198 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 208 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 667; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 33;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 9;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 15; /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 34;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#endif
+
+#endif /* _DDDR_H_ */
diff --git a/dm35x.c b/dm35x.c
new file mode 100644
index 0000000..aa5fea7
--- /dev/null
+++ b/dm35x.c
@@ -0,0 +1,121 @@
+/*
+ * dm35x.c - DM35x specific platform initialization
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "davinci.h"
+#include "util.h"
+
+#define DEVICE_VTPIOCR_PWRDN_MASK       (0x00000040)
+#define DEVICE_VTPIOCR_LOCK_MASK        (0x00000080)
+#define DEVICE_VTPIOCR_PWRSAVE_MASK     (0x00000100)
+#define DEVICE_VTPIOCR_CLR_MASK         (0x00002000)
+#define DEVICE_VTPIOCR_VTPIOREADY_MASK  (0x00004000)
+#define DEVICE_VTPIOCR_READY_MASK       (0x00008000)
+
+/* List of modules to enable in the PSC */
+const int8_t lpsc_en_list[] = {
+	LPSC_VPSS_MAST,
+	LPSC_VPSS_SLV,
+	LPSC_EDMACC,
+	LPSC_EDMATC0,
+	LPSC_EDMATC1,
+	LPSC_TIMER3,
+	LPSC_SPI1,
+	LPSC_MMC_SD1,
+	LPSC_ASP1,
+	LPSC_USB,
+	LPSC_PWM3,
+	LPSC_SPI2,
+	LPSC_RTO,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_MEMSTK,
+	LPSC_ASP0,
+	LPSC_I2C,
+	LPSC_UART0,
+	LPSC_UART1,
+	LPSC_UART2,
+	LPSC_SPIO,
+	LPSC_PWM0,
+	LPSC_PWM1,
+	LPSC_PWM2,
+	LPSC_GPIO,
+	LPSC_TIMER0,
+	LPSC_TIMER1,
+	LPSC_TIMER2,
+	LPSC_SYSMOD,
+	LPSC_ARM,
+	LPSC_VPSS_DAC,
+};
+
+const size_t lpsc_en_list_len = sizeof(lpsc_en_list) /
+	sizeof(lpsc_en_list[0]);
+
+/* List of modules for which to control EMURSTIE */
+const int8_t lpsc_emurstie_list[] = {
+	LPSC_VPSS_MAST,
+	LPSC_VPSS_SLV,
+	LPSC_TIMER3,
+	LPSC_SPI1,
+	LPSC_USB,
+	LPSC_PWM3,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_ASP0,
+	LPSC_GPIO,
+	LPSC_VPSS_DAC,
+};
+
+const size_t lpsc_emurstie_list_len = sizeof(lpsc_emurstie_list) /
+	sizeof(lpsc_emurstie_list[0]);
+
+/* DDR2 VTP Calibration */
+void
+ddr_vtp_calibration(void)
+{
+	/* DO VTP calibration:
+	 * Clear CLR & PWRDN & LOCK bits */
+	SYSTEM->VTPIOCR &= ~(DEVICE_VTPIOCR_PWRDN_MASK |
+			     DEVICE_VTPIOCR_LOCK_MASK |
+			     DEVICE_VTPIOCR_CLR_MASK);
+
+	/* Un-clear VTP */
+	SYSTEM->VTPIOCR |= DEVICE_VTPIOCR_CLR_MASK;
+
+	/* Wait for ready */
+	while (!(SYSTEM->VTPIOCR & DEVICE_VTPIOCR_READY_MASK));
+
+	/* Set bit VTP_IO_READY */
+	SYSTEM->VTPIOCR |= DEVICE_VTPIOCR_VTPIOREADY_MASK;
+
+	/* Enable power save mode and lock impedance */
+	SYSTEM->VTPIOCR |= (DEVICE_VTPIOCR_PWRSAVE_MASK |
+			    DEVICE_VTPIOCR_LOCK_MASK);
+
+	/* Powerdown VTP as it is locked */
+	SYSTEM->VTPIOCR |= DEVICE_VTPIOCR_PWRDN_MASK;
+
+	/* Wait for calibration to complete */
+	waitloop(150);
+}
diff --git a/dm35x.h b/dm35x.h
new file mode 100644
index 0000000..c214ed0
--- /dev/null
+++ b/dm35x.h
@@ -0,0 +1,64 @@
+/*
+ * dm35x.h - DM35x specific platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DM35X_H_
+#define _DM35X_H_
+
+#include "common.h"
+
+#define SYSTEM_CLK_HZ 24000000
+
+#define VPBE_CLK_HZ   27000000
+
+#define PLL1_Mult 144 /* 216 MHz */
+
+#define PLL2_Mult 114
+#define PLL2_Div1   2 /* Fixed */
+#define PLL2_Div2  -1 /* PLLDIV2 not used */
+
+#define DEVICE_MISC_PLL1POSTDIV_MASK	0x00000002
+#define DEVICE_MISC_AIMWAITST_MASK	0x00000001
+#define DEVICE_MISC_TIMER2WDT_MASK	0x00000010
+
+#define UBL_IMAGE_SIZE 0x7800 /* 30 kB UBL (2Kb reserved for RBL stack) */
+
+/* Global Memory Timing and PLL Settings */
+static const uint8_t  DDR_NM = 1;	/* 16-bit bus width only on DM35x. */
+static const uint8_t  DDR_PAGESIZE = 2;	/* 1024-word page size. */
+
+#define DDR_PBBPR_PR_OLD_COUNT 0x000000FE;
+
+/* PINMUX2 register bit values */
+#define PINMUX2_EM_CLK     (1 << 11)
+#define PINMUX2_EM_AVD     (1 << 10)
+#define PINMUX2_EM_WAIT    (1 <<  9)
+#define PINMUX2_EM_WE_OE   (1 <<  8)
+#define PINMUX2_EM_CE1     (1 <<  7)
+#define PINMUX2_EM_CE0     (1 <<  6)
+#define PINMUX2_EM_D7_0    (1 <<  5)
+#define PINMUX2_EM_D15_8   (1 <<  4)
+#define PINMUX2_EM_BA0     (1 <<  2)
+#define PINMUX2_EM_A0_BA1  (1 <<  1)
+#define PINMUX2_EM_A13_3   (1 <<  0)
+
+#endif /* _DM35X_H_ */
diff --git a/dm644x.c b/dm644x.c
new file mode 100644
index 0000000..2d55f62
--- /dev/null
+++ b/dm644x.c
@@ -0,0 +1,113 @@
+/*
+ * dm644x.c - DM644x specific platform initialization
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "davinci.h"
+#include "util.h"
+
+#define VTPIOCR_EN_MASK       (1 << 13)
+#define VTPIOCR_RECAL_MASK    (1 << 15)
+
+#define DDRVTPER_EN_MASK      (1 << 0)
+
+#define DDRVTPR_CAL_DATA_MASK 0x03FF
+
+/* List of modules to enable in the PSC */
+const int8_t lpsc_en_list[] = {
+	LPSC_VPSS_MAST,
+	LPSC_VPSS_SLV,
+	LPSC_EDMACC,
+	LPSC_EDMATC0,
+	LPSC_EDMATC1,
+	LPSC_EMAC,
+	LPSC_EMAC_MEM_CTL,
+	LPSC_MDIO,
+	LPSC_USB,
+	LPSC_ATA,
+	LPSC_VLYNQ,
+	LPSC_HPI,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_ASP0,
+	LPSC_I2C,
+	LPSC_UART0,
+	LPSC_GPIO,
+	LPSC_TIMER0,
+	LPSC_ARM,
+	LPSC_IMCOP,
+};
+
+const size_t lpsc_en_list_len = sizeof(lpsc_en_list) /
+	sizeof(lpsc_en_list[0]);
+
+/* List of modules for which to control EMURSTIE */
+const int8_t lpsc_emurstie_list[] = {
+	LPSC_VPSS_SLV,
+	LPSC_EMAC,
+	LPSC_EMAC_MEM_CTL,
+	LPSC_MDIO,
+	LPSC_USB,
+	LPSC_ATA,
+	LPSC_VLYNQ,
+	LPSC_HPI,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_ASP0,
+	LPSC_GPIO,
+	LPSC_IMCOP,
+};
+
+const size_t lpsc_emurstie_list_len = sizeof(lpsc_emurstie_list) /
+	sizeof(lpsc_emurstie_list[0]);
+
+/* DDR2 VTP Calibration */
+void
+ddr_vtp_calibration(void)
+{
+	int32_t cal_data;
+
+	/* Enable VTP IO calibration bit (not started) */
+	DDR->VTPIOCR = 0x0000001F | VTPIOCR_EN_MASK;
+
+	/* Start VTP IO calibration */
+	DDR->VTPIOCR |= VTPIOCR_RECAL_MASK;
+
+	/* Wait for calibration to complete */
+	waitloop(11*33);
+
+	/* Enable access to DDRVTPR */
+	SYSTEM->DDRVTPER = DDRVTPER_EN_MASK;
+
+	cal_data = DDRVTPR & DDRVTPR_CAL_DATA_MASK; /* Read calibration data */
+
+	/* Write calibration data to VTP Control register */
+	DDR->VTPIOCR &= ~DDRVTPR_CAL_DATA_MASK;
+	DDR->VTPIOCR |= cal_data;
+
+	/* Disable VTP IO calibration bit */
+	DDR->VTPIOCR &= ~VTPIOCR_EN_MASK;
+
+	/* Disable access to DDRVTPR */
+	SYSTEM->DDRVTPER = 0;
+}
diff --git a/dm644x.h b/dm644x.h
new file mode 100644
index 0000000..511d202
--- /dev/null
+++ b/dm644x.h
@@ -0,0 +1,87 @@
+/*
+ * dm644x.h - DM644x specific platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DM644X_H_
+#define _DM644X_H_
+
+#include "common.h"
+
+#define SYSTEM_CLK_HZ 27000000
+
+#define PLL1_Mult 22 /* DSP=594 MHz ARM=297 MHz */
+
+#define PLL2_Mult 24 /* DDRPHY=324 MHz DDRCLK=162 MHz */
+#define PLL2_Div1 12
+#define PLL2_Div2  2
+
+#define UBL_IMAGE_SIZE 0x3800 /* 14 kB UBL (2Kb reserved for RBL stack) */
+
+/* Global Memory Timing and PLL Settings */
+static const uint8_t  DDR_NM = 0;	/* 32-bit bus width by default. */
+static const uint8_t  DDR_PAGESIZE = 2;	/* 1024-word page size. */
+
+/*
+ * See TMS320DM6446 errata 2.1.2:
+ *   A value of $20 should provide a good ARM (cache enabled)
+ *   performance and still allow good utilization by the VPSS or other
+ *   modules.
+ */
+#define DDR_PBBPR_PR_OLD_COUNT 0x00000020;
+
+/* PINMUX0 register bit values */
+#define PINMUX0_EMACEN   (1 << 31)
+#define PINMUX0_HPIEN    (1 << 29)
+#define PINMUX0_CFLDEN   (1 << 27)
+#define PINMUX0_CWE      (1 << 26)
+#define PINMUX0_LFLDEN   (1 << 25)
+#define PINMUX0_LOEEN    (1 << 24)
+#define PINMUX0_RGB888   (1 << 23)
+#define PINMUX0_RGB666   (1 << 22)
+#define PINMUX0_ATAEN    (1 << 17)
+#define PINMUX0_HDIREN   (1 << 16)
+#define PINMUX0_VLYNQEN  (1 << 15)
+#define PINMUX0_VLSCREN  (1 << 14)
+#define PINMUX0_VLYNQWD1 (1 << 13)
+#define PINMUX0_VLYNQWD0 (1 << 12)
+
+#define VLYNQ_WIDTH_1 (0 << 12)
+#define VLYNQ_WIDTH_2 (1 << 12)
+#define VLYNQ_WIDTH_3 (3 << 12) /* See TI SPRUE26A document. */
+#define VLYNQ_WIDTH_4 (2 << 12) /* See TI SPRUE26A document. */
+
+/* PINMUX1 register bit values */
+#define PINMUX1_TIMIN (1 << 18)
+#define PINMUX1_CLK1  (1 << 17)
+#define PINMUX1_CLK0  (1 << 16)
+#define PINMUX1_ASP   (1 << 10)
+#define PINMUX1_SPI   (1 <<  8)
+#define PINMUX1_I2C   (1 <<  7)
+#define PINMUX1_PWM2  (1 <<  6)
+#define PINMUX1_PWM1  (1 <<  5)
+#define PINMUX1_PWM0  (1 <<  4)
+#define PINMUX1_U2FLO (1 <<  3)
+#define PINMUX1_UART2 (1 <<  2)
+#define PINMUX1_UART1 (1 <<  1)
+#define PINMUX1_UART0 (1 <<  0)
+
+#endif /* _DM644X_H_ */
diff --git a/gpio.c b/gpio.c
new file mode 100644
index 0000000..e09f97a
--- /dev/null
+++ b/gpio.c
@@ -0,0 +1,91 @@
+/*
+ * gpio.c - GPIO handling
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on davinci gpio code from the Linux kernel, original copyright follows:
+ *   Copyright (c) 2006-2007 David Brownell
+ *   Copyright (c) 2007, MontaVista Software, Inc. <source@mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "gpio.h"
+
+static struct gpio_controller *
+gpio_to_controller(unsigned gpio)
+{
+	void *ptr;
+
+	if (gpio < 32 * 1)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x10;
+	else if (gpio < 32 * 2)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x38;
+	else if (gpio < 32 * 3)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x60;
+	else if (gpio < 32 * 4)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x88;
+	else
+		ptr = NULL;
+
+	return ptr;
+}
+
+static inline uint32_t
+gpio_mask(unsigned gpio)
+{
+	return 1 << (gpio % 32);
+}
+
+int
+gpio_direction_in(unsigned gpio)
+{
+	volatile struct gpio_controller *g = gpio_to_controller(gpio);
+	uint32_t mask = gpio_mask(gpio);
+
+	g->dir |= mask;
+
+	return 0;
+}
+
+int
+gpio_direction_out(unsigned gpio, int initial_value)
+{
+	volatile struct gpio_controller *g = gpio_to_controller(gpio);
+	uint32_t mask = gpio_mask(gpio);
+
+	if (initial_value)
+		g->set_data = mask;
+	else
+		g->clr_data = mask;
+
+	g->dir &= ~mask;
+
+	return 0;
+}
+
+void
+gpio_set(unsigned gpio, int state)
+{
+	volatile struct gpio_controller *g = gpio_to_controller(gpio);
+	uint32_t mask = gpio_mask(gpio);
+
+	if (state)
+		g->set_data = mask;
+	else
+		g->clr_data = mask;
+}
diff --git a/gpio.h b/gpio.h
new file mode 100644
index 0000000..ca941e3
--- /dev/null
+++ b/gpio.h
@@ -0,0 +1,40 @@
+/*
+ * gpio.h - Gpio specific platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _GPIO_H_
+#define _GPIO_H_
+
+#include "common.h"
+
+#define GPIO(X) (X) /* 0 <= X <= (DAVINCI_N_GPIO - 1) */
+
+int
+gpio_direction_in(unsigned gpio);
+
+int
+gpio_direction_out(unsigned gpio, int initial_value);
+
+void
+gpio_set(unsigned gpio, int state);
+
+#endif /* _GPIO_H_ */
diff --git a/gunzip.c b/gunzip.c
new file mode 100644
index 0000000..b383ca7
--- /dev/null
+++ b/gunzip.c
@@ -0,0 +1,868 @@
+/* gunzip.c - puff implementation
+ *
+ * Copyright (C) 2002-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in puff.h
+ * version 1.8, 9 Jan 2004
+ *
+ * puff.c is a simple inflate written to be an unambiguous way to specify the
+ * deflate format.  It is not written for speed but rather simplicity.  As a
+ * side benefit, this code might actually be useful when small code is more
+ * important than speed, such as bootstrap applications.  For typical deflate
+ * data, zlib's inflate() is about four times as fast as puff().  zlib's
+ * inflate compiles to around 20K on my machine, whereas puff.c compiles to
+ * around 4K on my machine (a PowerPC using GNU cc).  If the faster decode()
+ * function here is used, then puff() is only twice as slow as zlib's
+ * inflate().
+ *
+ * All dynamically allocated memory comes from the stack.  The stack required
+ * is less than 2K bytes.  This code is compatible with 16-bit int's and
+ * assumes that long's are at least 32 bits.  puff.c uses the short data type,
+ * assumed to be 16 bits, for arrays in order to to conserve memory.  The code
+ * works whether integers are stored big endian or little endian.
+ *
+ * In the comments below are "Format notes" that describe the inflate process
+ * and document some of the less obvious aspects of the format.  This source
+ * code is meant to supplement RFC 1951, which formally describes the deflate
+ * format:
+ *
+ *    http://www.zlib.org/rfc-deflate.html
+ */
+
+/*
+ * Change history:
+ *
+ * 1.0  10 Feb 2002     - First version
+ * 1.1  17 Feb 2002     - Clarifications of some comments and notes
+ *                      - Update puff() dest and source pointers on negative
+ *                        errors to facilitate debugging deflators
+ *                      - Remove longest from struct huffman -- not needed
+ *                      - Simplify offs[] index in construct()
+ *                      - Add input size and checking, using longjmp() to
+ *                        maintain easy readability
+ *                      - Use short data type for large arrays
+ *                      - Use pointers instead of long to specify source and
+ *                        destination sizes to avoid arbitrary 4 GB limits
+ * 1.2  17 Mar 2002     - Add faster version of decode(), doubles speed (!),
+ *                        but leave simple version for readabilty
+ *                      - Make sure invalid distances detected if pointers
+ *                        are 16 bits
+ *                      - Fix fixed codes table error
+ *                      - Provide a scanning mode for determining size of
+ *                        uncompressed data
+ * 1.3  20 Mar 2002     - Go back to lengths for puff() parameters [Jean-loup]
+ *                      - Add a puff.h file for the interface
+ *                      - Add braces in puff() for else do [Jean-loup]
+ *                      - Use indexes instead of pointers for readability
+ * 1.4  31 Mar 2002     - Simplify construct() code set check
+ *                      - Fix some comments
+ *                      - Add FIXLCODES #define
+ * 1.5   6 Apr 2002     - Minor comment fixes
+ * 1.6   7 Aug 2002     - Minor format changes
+ * 1.7   3 Mar 2003     - Added test code for distribution
+ *                      - Added zlib-like license
+ * 1.8   9 Jan 2004     - Added some comments on no distance codes case
+ * 1.9  20 Feb 2009     - Hugo villeneuve: changed puff function name to gunzip
+ */
+
+#include "uart.h"
+
+#define NIL ((unsigned char *)0)        /* for no output option */
+
+/*
+ * Maximums for allocations and loops.  It is not useful to change these --
+ * they are fixed by the deflate format.
+ */
+#define MAXBITS 15              /* maximum bits in a code */
+#define MAXLCODES 286           /* maximum number of literal/length codes */
+#define MAXDCODES 30            /* maximum number of distance codes */
+#define MAXCODES (MAXLCODES+MAXDCODES)  /* maximum codes lengths to read */
+#define FIXLCODES 288           /* number of fixed literal/length codes */
+
+/* input and output state */
+struct state {
+	/* output state */
+	unsigned char *out;         /* output buffer */
+	unsigned long outlen;       /* available space at out */
+	unsigned long outcnt;       /* bytes written to out so far */
+
+	/* input state */
+	unsigned char *in;          /* input buffer */
+	unsigned long inlen;        /* available input at in */
+	unsigned long incnt;        /* bytes read so far */
+	int bitbuf;                 /* bit buffer */
+	int bitcnt;                 /* number of bits in bit buffer */
+};
+
+static int pufferror;
+
+/*
+ * Moved those variables from internal stack to DDR.
+ * They were using too much internal stack and corrupted data.
+ */
+short distcnt[MAXBITS+1] __attribute__((section(".ddrram"))); /* distcode memory */
+short distsym[MAXDCODES] __attribute__((section(".ddrram"))); /* distcode memory */
+short lencnt[MAXBITS+1] __attribute__((section(".ddrram"))); /* lencode memory */
+short dyn_lengths[MAXCODES] __attribute__((section(".ddrram"))); /* descriptor code lengths */
+short lensym[FIXLCODES] __attribute__((section(".ddrram")));
+
+/*
+ * Return need bits from the input stream.  This always leaves less than
+ * eight bits in the buffer.  bits() works properly for need == 0.
+ *
+ * Format notes:
+ *
+ * - Bits are stored in bytes from the least significant bit to the most
+ *   significant bit.  Therefore bits are dropped from the bottom of the bit
+ *   buffer, using shift right, and new bytes are appended to the top of the
+ *   bit buffer, using shift left.
+ */
+static int
+bits(struct state *s, int need)
+{
+	long val;           /* bit accumulator (can use up to 20 bits) */
+
+	/* load at least need bits into val */
+	val = s->bitbuf;
+
+	while (s->bitcnt < need) {
+		if (s->incnt == s->inlen) {
+			pufferror = -1;
+			return 0; /* out of input */
+		}
+
+		/* load eight bits */
+		val |= (long)(s->in[s->incnt++]) << s->bitcnt;
+		s->bitcnt += 8;
+	}
+
+	/* drop need bits and update buffer, always zero to seven bits left */
+	s->bitbuf = (int)(val >> need);
+
+	s->bitcnt -= need;
+
+	/* return need bits, zeroing the bits above that */
+	return (int)(val & ((1L << need) - 1));
+}
+
+/*
+ * Process a stored block.
+ *
+ * Format notes:
+ *
+ * - After the two-bit stored block type (00), the stored block length and
+ *   stored bytes are byte-aligned for fast copying.  Therefore any leftover
+ *   bits in the byte that has the last bit of the type, as many as seven, are
+ *   discarded.  The value of the discarded bits are not defined and should not
+ *   be checked against any expectation.
+ *
+ * - The second inverted copy of the stored block length does not have to be
+ *   checked, but it's probably a good idea to do so anyway.
+ *
+ * - A stored block can have zero length.  This is sometimes used to byte-align
+ *   subsets of the compressed data for random access or partial recovery.
+ */
+static int
+stored(struct state *s)
+{
+	unsigned len;       /* length of stored block */
+
+	/* discard leftover bits from current byte (assumes s->bitcnt < 8) */
+	s->bitbuf = 0;
+	s->bitcnt = 0;
+
+	/* get length and check against its one's complement */
+	if (s->incnt + 4 > s->inlen)
+		return 2; /* not enough input */
+	len = s->in[s->incnt++];
+	len |= s->in[s->incnt++] << 8;
+	if (s->in[s->incnt++] != (~len & 0xff) ||
+	    s->in[s->incnt++] != ((~len >> 8) & 0xff))
+		return -2; /* didn't match complement! */
+
+	/* copy len bytes from in to out */
+	if (s->incnt + len > s->inlen)
+		return 2; /* not enough input */
+	if (s->out != NIL) {
+		if (s->outcnt + len > s->outlen)
+			return 1; /* not enough output space */
+		while (len--)
+			s->out[s->outcnt++] = s->in[s->incnt++];
+	} else { /* just scanning */
+		s->outcnt += len;
+		s->incnt += len;
+	}
+
+	/* done with a valid stored block */
+	return 0;
+}
+
+/*
+ * Huffman code decoding tables.  count[1..MAXBITS] is the number of symbols of
+ * each length, which for a canonical code are stepped through in order.
+ * symbol[] are the symbol values in canonical order, where the number of
+ * entries is the sum of the counts in count[].  The decoding process can be
+ * seen in the function decode() below.
+ */
+struct huffman {
+	short *count;       /* number of symbols of each length */
+	short *symbol;      /* canonically ordered symbols */
+};
+
+/*
+ * Decode a code from the stream s using huffman table h.  Return the symbol or
+ * a negative value if there is an error.  If all of the lengths are zero, i.e.
+ * an empty code, or if the code is incomplete and an invalid code is received,
+ * then -9 is returned after reading MAXBITS bits.
+ *
+ * Format notes:
+ *
+ * - The codes as stored in the compressed data are bit-reversed relative to
+ *   a simple integer ordering of codes of the same lengths.  Hence below the
+ *   bits are pulled from the compressed data one at a time and used to
+ *   build the code value reversed from what is in the stream in order to
+ *   permit simple integer comparisons for decoding.  A table-based decoding
+ *   scheme (as used in zlib) does not need to do this reversal.
+ *
+ * - The first code for the shortest length is all zeros.  Subsequent codes of
+ *   the same length are simply integer increments of the previous code.  When
+ *   moving up a length, a zero bit is appended to the code.  For a complete
+ *   code, the last code of the longest length will be all ones.
+ *
+ * - Incomplete codes are handled by this decoder, since they are permitted
+ *   in the deflate format.  See the format notes for fixed() and dynamic().
+ */
+#ifdef SLOW
+static int
+decode(struct state *s, struct huffman *h)
+{
+	int len;   /* current number of bits in code */
+	int code;  /* len bits being decoded */
+	int first; /* first code of length len */
+	int count; /* number of codes of length len */
+	int index; /* index of first code of length len in symbol table */
+
+	code = first = index = 0;
+	for (len = 1; len <= MAXBITS; len++) {
+		code |= bits(s, 1); /* get next bit */
+		if (pufferror)
+			return -1;
+
+		count = h->count[len];
+		if (code < first + count) /* if length len, return symbol */
+			return h->symbol[index + (code - first)];
+		index += count; /* else update for next length */
+		first += count;
+		first <<= 1;
+		code <<= 1;
+	}
+	return -9; /* ran out of codes */
+}
+
+/*
+ * A faster version of decode() for real applications of this code.   It's not
+ * as readable, but it makes puff() twice as fast.  And it only makes the code
+ * a few percent larger.
+ */
+#else /* !SLOW */
+static int
+decode(struct state *s, struct huffman *h)
+{
+	int len;     /* current number of bits in code */
+	int code;    /* len bits being decoded */
+	int first;   /* first code of length len */
+	int count;   /* number of codes of length len */
+	int index;   /* index of first code of length len in symbol table */
+	int bitbuf;  /* bits from stream */
+	int left;    /* bits left in next or left to process */
+	short *next; /* next number of codes */
+
+	bitbuf = s->bitbuf;
+	left = s->bitcnt;
+	code = first = index = 0;
+	len = 1;
+	next = h->count + 1;
+	while (1) {
+		while (left--) {
+			code |= bitbuf & 1;
+			bitbuf >>= 1;
+			count = *next++;
+			if (code < first + count) {
+				/* if length len, return symbol */
+				s->bitbuf = bitbuf;
+				s->bitcnt = (s->bitcnt - len) & 7;
+				return h->symbol[index + (code - first)];
+			}
+			index += count; /* else update for next length */
+			first += count;
+			first <<= 1;
+			code <<= 1;
+			len++;
+		}
+		left = (MAXBITS+1) - len;
+		if (left == 0)
+			break;
+		if (s->incnt == s->inlen) {
+			pufferror = -1; /* out of input */
+			return -1;
+		}
+
+		bitbuf = s->in[s->incnt++];
+		if (left > 8)
+			left = 8;
+	}
+	return -9; /* ran out of codes */
+}
+#endif /* SLOW */
+
+/*
+ * Given the list of code lengths length[0..n-1] representing a canonical
+ * Huffman code for n symbols, construct the tables required to decode those
+ * codes.  Those tables are the number of codes of each length, and the symbols
+ * sorted by length, retaining their original order within each length.  The
+ * return value is zero for a complete code set, negative for an over-
+ * subscribed code set, and positive for an incomplete code set.  The tables
+ * can be used if the return value is zero or positive, but they cannot be used
+ * if the return value is negative.  If the return value is zero, it is not
+ * possible for decode() using that table to return an error--any stream of
+ * enough bits will resolve to a symbol.  If the return value is positive, then
+ * it is possible for decode() using that table to return an error for received
+ * codes past the end of the incomplete lengths.
+ *
+ * Not used by decode(), but used for error checking, h->count[0] is the number
+ * of the n symbols not in the code.  So n - h->count[0] is the number of
+ * codes.  This is useful for checking for incomplete codes that have more than
+ * one symbol, which is an error in a dynamic block.
+ *
+ * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS
+ * This is assured by the construction of the length arrays in dynamic() and
+ * fixed() and is not verified by construct().
+ *
+ * Format notes:
+ *
+ * - Permitted and expected examples of incomplete codes are one of the fixed
+ *   codes and any code with a single symbol which in deflate is coded as one
+ *   bit instead of zero bits.  See the format notes for fixed() and dynamic().
+ *
+ * - Within a given code length, the symbols are kept in ascending order for
+ *   the code bits definition.
+ */
+static int
+construct(struct huffman *h, short *length, int n)
+{
+	int symbol; /* current symbol when stepping through length[] */
+	int len;    /* current length when stepping through h->count[] */
+	int left;   /* number of possible codes left of current length */
+	short offs[MAXBITS+1]; /* offsets in symbol table for each length */
+
+	/* count number of codes of each length */
+	for (len = 0; len <= MAXBITS; len++)
+		h->count[len] = 0;
+
+	/* assumes lengths are within bounds */
+	for (symbol = 0; symbol < n; symbol++)
+		(h->count[length[symbol]])++;
+	if (h->count[0] == n) /* no codes! */
+		return 0;     /* complete, but decode() will fail */
+
+	/* check for an over-subscribed or incomplete set of lengths */
+	left = 1; /* one possible code of zero length */
+	for (len = 1; len <= MAXBITS; len++) {
+		left <<= 1;            /* one more bit, double codes left */
+		left -= h->count[len]; /* deduct count from possible codes */
+		if (left < 0)
+			return left; /* over-subscribed--return negative */
+	} /* left > 0 means incomplete */
+
+	/* generate offsets into symbol table for each length for sorting */
+	offs[1] = 0;
+	for (len = 1; len < MAXBITS; len++)
+		offs[len + 1] = offs[len] + h->count[len];
+
+	/*
+	 * put symbols in table sorted by length, by symbol order within each
+	 * length
+	 */
+	for (symbol = 0; symbol < n; symbol++)
+		if (length[symbol] != 0)
+			h->symbol[offs[length[symbol]]++] = symbol;
+
+	/* return zero for complete set, positive for incomplete set */
+	return left;
+}
+
+/*
+ * Decode literal/length and distance codes until an end-of-block code.
+ *
+ * Format notes:
+ *
+ * - Compressed data that is after the block type if fixed or after the code
+ *   description if dynamic is a combination of literals and length/distance
+ *   pairs terminated by and end-of-block code.  Literals are simply Huffman
+ *   coded bytes.  A length/distance pair is a coded length followed by a
+ *   coded distance to represent a string that occurs earlier in the
+ *   uncompressed data that occurs again at the current location.
+ *
+ * - Literals, lengths, and the end-of-block code are combined into a single
+ *   code of up to 286 symbols.  They are 256 literals (0..255), 29 length
+ *   symbols (257..285), and the end-of-block symbol (256).
+ *
+ * - There are 256 possible lengths (3..258), and so 29 symbols are not enough
+ *   to represent all of those.  Lengths 3..10 and 258 are in fact represented
+ *   by just a length symbol.  Lengths 11..257 are represented as a symbol and
+ *   some number of extra bits that are added as an integer to the base length
+ *   of the length symbol.  The number of extra bits is determined by the base
+ *   length symbol.  These are in the static arrays below, lens[] for the base
+ *   lengths and lext[] for the corresponding number of extra bits.
+ *
+ * - The reason that 258 gets its own symbol is that the longest length is used
+ *   often in highly redundant files.  Note that 258 can also be coded as the
+ *   base value 227 plus the maximum extra value of 31.  While a good deflate
+ *   should never do this, it is not an error, and should be decoded properly.
+ *
+ * - If a length is decoded, including its extra bits if any, then it is
+ *   followed a distance code.  There are up to 30 distance symbols.  Again
+ *   there are many more possible distances (1..32768), so extra bits are added
+ *   to a base value represented by the symbol.  The distances 1..4 get their
+ *   own symbol, but the rest require extra bits.  The base distances and
+ *   corresponding number of extra bits are below in the static arrays dist[]
+ *   and dext[].
+ *
+ * - Literal bytes are simply written to the output.  A length/distance pair is
+ *   an instruction to copy previously uncompressed bytes to the output.  The
+ *   copy is from distance bytes back in the output stream, copying for length
+ *   bytes.
+ *
+ * - Distances pointing before the beginning of the output data are not
+ *   permitted.
+ *
+ * - Overlapped copies, where the length is greater than the distance, are
+ *   allowed and common.  For example, a distance of one and a length of 258
+ *   simply copies the last byte 258 times.  A distance of four and a length of
+ *   twelve copies the last four bytes three times.  A simple forward copy
+ *   ignoring whether the length is greater than the distance or not implements
+ *   this correctly.  You should not use memcpy() since its behavior is not
+ *   defined for overlapped arrays.  You should not use memmove() or bcopy()
+ *   since though their behavior -is- defined for overlapping arrays, it is
+ *   defined to do the wrong thing in this case.
+ */
+static int
+codes(struct state *s,
+      struct huffman *lencode,
+      struct huffman *distcode)
+{
+	int symbol;         /* decoded symbol */
+	int len;            /* length for copy */
+	unsigned dist;      /* distance for copy */
+	/* Size base for length codes 257-285 */
+	static const short lens[29] = {
+		3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+		35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
+	/* Extra bits for length codes 257-285 */
+	static const short lext[29] = {
+		0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+		3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
+	/* Offset base for distance codes 0-29 */
+	static const short dists[30] = {
+		1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+		257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+		8193, 12289, 16385, 24577};
+	/* Extra bits for distance codes 0-29 */
+	static const short dext[30] = {
+		0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+		7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+		12, 12, 13, 13};
+
+	/* decode literals and length/distance pairs */
+	do {
+		symbol = decode(s, lencode);
+		if (symbol < 0)
+			return symbol; /* invalid symbol */
+		if (symbol < 256) {            /* literal: symbol is the byte */
+			/* write out the literal */
+			if (s->out != NIL) {
+				if (s->outcnt == s->outlen)
+					return 1;
+				s->out[s->outcnt] = symbol;
+			}
+			s->outcnt++;
+		} else if (symbol > 256) {        /* length */
+			/* get and compute length */
+			symbol -= 257;
+			if (symbol >= 29)
+				return -9;        /* invalid fixed code */
+			len = lens[symbol] + bits(s, lext[symbol]);
+			if (pufferror)
+				return -1;
+
+			/* get and check distance */
+			symbol = decode(s, distcode);
+			if (symbol < 0)
+				return symbol;      /* invalid symbol */
+			dist = dists[symbol] + bits(s, dext[symbol]);
+			if (pufferror)
+				return -1;
+
+			if (dist > s->outcnt)
+				return -10;     /* distance too far back */
+
+			/* copy length bytes from distance bytes back */
+			if (s->out != NIL) {
+				if (s->outcnt + len > s->outlen)
+					return 1;
+				while (len--) {
+					s->out[s->outcnt] =
+						s->out[s->outcnt - dist];
+					s->outcnt++;
+				}
+			} else
+				s->outcnt += len;
+		}
+	} while (symbol != 256);            /* end of block symbol */
+
+	/* done with a valid fixed or dynamic block */
+	return 0;
+}
+
+/*
+ * Process a fixed codes block.
+ *
+ * Format notes:
+ *
+ * - This block type can be useful for compressing small amounts of data for
+ *   which the size of the code descriptions in a dynamic block exceeds the
+ *   benefit of custom codes for that block.  For fixed codes, no bits are
+ *   spent on code descriptions.  Instead the code lengths for literal/length
+ *   codes and distance codes are fixed.  The specific lengths for each symbol
+ *   can be seen in the "for" loops below.
+ *
+ * - The literal/length code is complete, but has two symbols that are invalid
+ *   and should result in an error if received.  This cannot be implemented
+ *   simply as an incomplete code since those two symbols are in the "middle"
+ *   of the code.  They are eight bits long and the longest literal/length\
+ *   code is nine bits.  Therefore the code must be constructed with those
+ *   symbols, and the invalid symbols must be detected after decoding.
+ *
+ * - The fixed distance codes also have two invalid symbols that should result
+ *   in an error if received.  Since all of the distance codes are the same
+ *   length, this can be implemented as an incomplete code.  Then the invalid
+ *   codes are detected while decoding.
+ */
+static int
+fixed(struct state *s)
+{
+	static int virgin = 1;
+	//////////////static short lencnt[MAXBITS+1];
+	//////static short lensym[FIXLCODES];
+	///////////////////static short distcnt[MAXBITS+1], distsym[MAXDCODES];
+	static struct huffman lencode = {lencnt, lensym};
+	static struct huffman distcode = {distcnt, distsym};
+
+	/* build fixed huffman tables if first call (may not be thread safe) */
+	if (virgin) {
+		int symbol;
+		short lengths[FIXLCODES];
+
+		/* literal/length table */
+		for (symbol = 0; symbol < 144; symbol++)
+			lengths[symbol] = 8;
+		for (; symbol < 256; symbol++)
+			lengths[symbol] = 9;
+		for (; symbol < 280; symbol++)
+			lengths[symbol] = 7;
+		for (; symbol < FIXLCODES; symbol++)
+			lengths[symbol] = 8;
+		construct(&lencode, lengths, FIXLCODES);
+
+		/* distance table */
+		for (symbol = 0; symbol < MAXDCODES; symbol++)
+			lengths[symbol] = 5;
+		construct(&distcode, lengths, MAXDCODES);
+
+		/* do this just once */
+		virgin = 0;
+	}
+
+	/* decode data until end-of-block code */
+	return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Process a dynamic codes block.
+ *
+ * Format notes:
+ *
+ * - A dynamic block starts with a description of the literal/length and
+ *   distance codes for that block.  New dynamic blocks allow the compressor to
+ *   rapidly adapt to changing data with new codes optimized for that data.
+ *
+ * - The codes used by the deflate format are "canonical", which means that
+ *   the actual bits of the codes are generated in an unambiguous way simply
+ *   from the number of bits in each code.  Therefore the code descriptions
+ *   are simply a list of code lengths for each symbol.
+ *
+ * - The code lengths are stored in order for the symbols, so lengths are
+ *   provided for each of the literal/length symbols, and for each of the
+ *   distance symbols.
+ *
+ * - If a symbol is not used in the block, this is represented by a zero as
+ *   as the code length.  This does not mean a zero-length code, but rather
+ *   that no code should be created for this symbol.  There is no way in the
+ *   deflate format to represent a zero-length code.
+ *
+ * - The maximum number of bits in a code is 15, so the possible lengths for
+ *   any code are 1..15.
+ *
+ * - The fact that a length of zero is not permitted for a code has an
+ *   interesting consequence.  Normally if only one symbol is used for a given
+ *   code, then in fact that code could be represented with zero bits.  However
+ *   in deflate, that code has to be at least one bit.  So for example, if
+ *   only a single distance base symbol appears in a block, then it will be
+ *   represented by a single code of length one, in particular one 0 bit.  This
+ *   is an incomplete code, since if a 1 bit is received, it has no meaning,
+ *   and should result in an error.  So incomplete distance codes of one symbol
+ *   should be permitted, and the receipt of invalid codes should be handled.
+ *
+ * - It is also possible to have a single literal/length code, but that code
+ *   must be the end-of-block code, since every dynamic block has one.  This
+ *   is not the most efficient way to create an empty block (an empty fixed
+ *   block is fewer bits), but it is allowed by the format.  So incomplete
+ *   literal/length codes of one symbol should also be permitted.
+ *
+ * - If there are only literal codes and no lengths, then there are no distance
+ *   codes.  This is represented by one distance code with zero bits.
+ *
+ * - The list of up to 286 length/literal lengths and up to 30 distance lengths
+ *   are themselves compressed using Huffman codes and run-length encoding.  In
+ *   the list of code lengths, a 0 symbol means no code, a 1..15 symbol means
+ *   that length, and the symbols 16, 17, and 18 are run-length instructions.
+ *   Each of 16, 17, and 18 are follwed by extra bits to define the length of
+ *   the run.  16 copies the last length 3 to 6 times.  17 represents 3 to 10
+ *   zero lengths, and 18 represents 11 to 138 zero lengths.  Unused symbols
+ *   are common, hence the special coding for zero lengths.
+ *
+ * - The symbols for 0..18 are Huffman coded, and so that code must be
+ *   described first.  This is simply a sequence of up to 19 three-bit values
+ *   representing no code (0) or the code length for that symbol (1..7).
+ *
+ * - A dynamic block starts with three fixed-size counts from which is computed
+ *   the number of literal/length code lengths, the number of distance code
+ *   lengths, and the number of code length code lengths (ok, you come up with
+ *   a better name!) in the code descriptions.  For the literal/length and
+ *   distance codes, lengths after those provided are considered zero, i.e. no
+ *   code.  The code length code lengths are received in a permuted order (see
+ *   the order[] array below) to make a short code length code length list more
+ *   likely.  As it turns out, very short and very long codes are less likely
+ *   to be seen in a dynamic code description, hence what may appear initially
+ *   to be a peculiar ordering.
+ *
+ * - Given the number of literal/length code lengths (nlen) and distance code
+ *   lengths (ndist), then they are treated as one long list of nlen + ndist
+ *   code lengths.  Therefore run-length coding can and often does cross the
+ *   boundary between the two sets of lengths.
+ *
+ * - So to summarize, the code description at the start of a dynamic block is
+ *   three counts for the number of code lengths for the literal/length codes,
+ *   the distance codes, and the code length codes.  This is followed by the
+ *   code length code lengths, three bits each.  This is used to construct the
+ *   code length code which is used to read the remainder of the lengths.  Then
+ *   the literal/length code lengths and distance lengths are read as a single
+ *   set of lengths using the code length codes.  Codes are constructed from
+ *   the resulting two sets of lengths, and then finally you can start
+ *   decoding actual compressed data in the block.
+ *
+ * - For reference, a "typical" size for the code description in a dynamic
+ *   block is around 80 bytes.
+ */
+static int
+dynamic(struct state *s)
+{
+	int nlen, ndist, ncode;  /* number of dyn_lengths in descriptor */
+	int index;               /* index of dyn_lengths[] */
+	int err;                 /* construct() return value */
+	//////////////short lensym[MAXLCODES]; /* lencode memory */
+	struct huffman lencode = {lencnt, lensym};    /* length code */
+	struct huffman distcode = {distcnt, distsym}; /* distance code */
+	static const short order[19] = /* permutation of code length codes */
+		{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14,
+		 1, 15};
+
+	/* get number of dyn_lengths in each table, check dyn_lengths */
+	nlen = bits(s, 5) + 257;
+	ndist = bits(s, 5) + 1;
+	ncode = bits(s, 4) + 4;
+	if (pufferror)
+		return -1;
+
+	if (nlen > MAXLCODES || ndist > MAXDCODES)
+		return -3;                      /* bad counts */
+
+	/* read code length code dyn_lengths (really), missing dyn_lengths are zero */
+	for (index = 0; index < ncode; index++) {
+		dyn_lengths[order[index]] = bits(s, 3);
+		if (pufferror)
+			return -1;
+	}
+	for (; index < 19; index++)
+		dyn_lengths[order[index]] = 0;
+
+	/* build huffman table for code dyn_lengths codes (use lencode
+	 * temporarily) */
+	err = construct(&lencode, dyn_lengths, 19);
+	if (err != 0)
+		return -4;            /* require complete code set here */
+
+	/* read length/literal and distance code length tables */
+	index = 0;
+	while (index < nlen + ndist) {
+		int symbol;             /* decoded value */
+		int len;                /* last length to repeat */
+
+		symbol = decode(s, &lencode);
+		if (symbol < 16)            /* length in 0..15 */
+			dyn_lengths[index++] = symbol;
+		else {                      /* repeat instruction */
+			len = 0;            /* assume repeating zeros */
+			if (symbol == 16) { /* repeat last length 3..6 times */
+				if (index == 0)
+					return -5;      /* no last length! */
+				len = dyn_lengths[index - 1]; /* last length */
+				symbol = 3 + bits(s, 2);
+				if (pufferror)
+					return -1;
+			} else if (symbol == 17) { /* repeat zero 3..10 times */
+				symbol = 3 + bits(s, 3);
+				if (pufferror)
+					return -1;
+			} else { /* == 18, repeat zero 11..138 times */
+				symbol = 11 + bits(s, 7);
+				if (pufferror)
+					return -1;
+			}
+			if (index + symbol > nlen + ndist)
+				return -6; /* too many dyn_lengths! */
+			while (symbol--) /* repeat last or zero symbol times */
+				dyn_lengths[index++] = len;
+		}
+	}
+
+	/* build huffman table for literal/length codes */
+	err = construct(&lencode, dyn_lengths, nlen);
+	if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1))
+		return -7; /* only allow incomplete codes if just one code */
+
+	/* build huffman table for distance codes */
+	err = construct(&distcode, dyn_lengths + nlen, ndist);
+	if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1))
+		return -8; /* only allow incomplete codes if just one code */
+
+	/* decode data until end-of-block code */
+	return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Inflate source to dest.  On return, destlen and sourcelen are updated to the
+ * size of the uncompressed data and the size of the deflate data respectively.
+ * On success, the return value of puff() is zero.  If there is an error in the
+ * source data, i.e. it is not in the deflate format, then a negative value is
+ * returned.  If there is not enough input available or there is not enough
+ * output space, then a positive error is returned.  In that case, destlen and
+ * sourcelen are not updated to facilitate retrying from the beginning with the
+ * provision of more input data or more output space.  In the case of invalid
+ * inflate data (a negative error), the dest and source pointers are updated to
+ * facilitate the debugging of deflators.
+ *
+ * puff() also has a mode to determine the size of the uncompressed output with
+ * no output written.  For this dest must be (unsigned char *)0.  In this case,
+ * the input value of *destlen is ignored, and on return *destlen is set to the
+ * size of the uncompressed output.
+ *
+ * The return codes are:
+ *
+ *   2:  available inflate data did not terminate
+ *   1:  output space exhausted before completing inflate
+ *   0:  successful inflate
+ *  -1:  invalid block type (type == 3)
+ *  -2:  stored block length did not match one's complement
+ *  -3:  dynamic block code description: too many length or distance codes
+ *  -4:  dynamic block code description: code lengths codes incomplete
+ *  -5:  dynamic block code description: repeat lengths with no first length
+ *  -6:  dynamic block code description: repeat more than specified lengths
+ *  -7:  dynamic block code description: invalid literal/length code lengths
+ *  -8:  dynamic block code description: invalid distance code lengths
+ *  -9:  invalid literal/length or distance code in fixed or dynamic block
+ * -10:  distance is too far back in fixed or dynamic block
+ *
+ * Format notes:
+ *
+ * - Three bits are read for each block to determine the kind of block and
+ *   whether or not it is the last block.  Then the block is decoded and the
+ *   process repeated if it was not the last block.
+ *
+ * - The leftover bits in the last byte of the deflate data after the last
+ *   block (if it was a fixed or dynamic block) are undefined and have no
+ *   expected values to check.
+ */
+int
+gunzip(unsigned char *dest,      /* destination pointer */
+       unsigned long *destlen,   /* amount of output space */
+       unsigned char *source,    /* source data pointer */
+       unsigned long *sourcelen) /* amount of input available */
+{
+	struct state s;             /* input/output state */
+	int last, type;             /* block information */
+	int err;                    /* return value */
+
+	pufferror = 0;
+
+	/* initialize output state */
+	s.out = dest;
+	s.outlen = *destlen;        /* ignored if dest is NIL */
+	s.outcnt = 0;
+
+	/* initialize input state */
+	s.in = source;
+	s.inlen = *sourcelen;
+	s.incnt = 0;
+	s.bitbuf = 0;
+	s.bitcnt = 0;
+
+	/* process blocks until last block or error */
+	do {
+		last = bits(&s, 1);         /* one if last block */
+		if (pufferror)
+			return 2;
+
+		type = bits(&s, 2);         /* block type 0..3 */
+		if (pufferror)
+			return 2;
+
+		switch (type) {
+		case 0:
+			err = stored(&s);
+			break;
+		case 1:
+			err = fixed(&s);
+			break;
+		case 2:
+			err = dynamic(&s);
+			break;
+		default:
+			err = -1; /* Invalid */
+			break;
+		}
+
+		if (pufferror)
+			return 2;
+
+		if (err != 0)
+			break;        /* return with error */
+	} while (!last);
+
+	/* update the lengths and return */
+	if (err <= 0) {
+		*destlen = s.outcnt;
+		*sourcelen = s.incnt;
+	}
+
+	return err;
+}
diff --git a/gunzip.h b/gunzip.h
new file mode 100644
index 0000000..6c4d325
--- /dev/null
+++ b/gunzip.h
@@ -0,0 +1,30 @@
+/* gunzip.h
+  Copyright (C) 2002, 2003 Mark Adler, all rights reserved
+  version 1.7, 3 Mar 2002
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the author be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Mark Adler    madler@alumni.caltech.edu
+ */
+
+#define GUNZIP_COMP_BLOCK_OFFSET 0x0A /* Offset of compressed block when no
+				       * flags are set in the GZIP file. */
+
+int gunzip(unsigned char *dest,       /* destination pointer */
+	   unsigned long *destlen,    /* amount of output space */
+	   unsigned char *source,     /* source data pointer */
+	   unsigned long *sourcelen); /* amount of input available */
diff --git a/nand.c b/nand.c
new file mode 100644
index 0000000..bede077
--- /dev/null
+++ b/nand.c
@@ -0,0 +1,975 @@
+/*
+ * nand.c - NAND flash functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "util.h"
+#include "uart.h"
+#include "nand.h"
+
+/* BUS width defines */
+#define BUS_8BIT    0x01
+#define BUS_16BIT   0x02
+#define BUS_32BIT   0x04
+
+/* NAND flash addresses */
+#define NAND_DATA_OFFSET    0x00
+#define NAND_ALE_OFFSET     0x08
+#define NAND_CLE_OFFSET     0x10
+
+#define NAND_TIMEOUT       20480
+
+/* NAND flash commands */
+#define NAND_LO_PAGE        0x00
+#define NAND_HI_PAGE        0x01
+#define NAND_LOCK           0x2A
+#define NAND_UNLOCK_START   0x23
+#define NAND_UNLOCK_END     0x24
+#define NAND_READ_30H       0x30
+#define NAND_EXTRA_PAGE     0x50
+#define	NAND_RDID           0x90
+#define NAND_RDIDADD        0x00
+#define	NAND_RESET          0xFF
+#define	NAND_PGRM_START     0x80
+#define NAND_PGRM_END       0x10
+#define NAND_RDY            0x40
+#define	NAND_PGM_FAIL       0x01
+#define	NAND_BERASEC1       0x60
+#define	NAND_BERASEC2       0xD0
+#define	NAND_STATUS         0x70
+
+/* Status output */
+#define NAND_NANDFSR_READY	0x01
+#define NAND_STATUS_WRITEREADY 	0xC0
+#define NAND_STATUS_ERROR	0x01
+#define NAND_STATUS_BUSY	0x40
+
+#define UNKNOWN_NAND		0xFF /* Unknown device id */
+
+/* Gives the page size in bytes without the spare bytes */
+#define NANDFLASH_PAGESIZE(x) ((x >> 8) << 8)
+
+union flash_data {
+	uint8_t c;
+	uint16_t w;
+	uint32_t l;
+};
+
+union flash_ptr {
+	volatile uint8_t *cp;
+	volatile uint16_t *wp;
+	volatile uint32_t *lp;
+};
+
+struct nand_dev_infos_t {
+	uint8_t   id;              /* Device ID */
+	uint16_t  num_blocks;      /* Number of blocks */
+	uint8_t   pages_per_block; /* Number of pages per block */
+	uint16_t  bytes_per_page;  /* Number of bytes per page (with spare) */
+};
+
+struct nand_info_t {
+	uint32_t base_addr;        /* Base address of NAND CS memory space. */
+	int      bus_width;        /* Bus width: 0 = 8 bits, 1 = 16 bits */
+	int      id;               /* Index into nand_dev_infos_t array. */
+	int      num_blocks;       /* Number of blocks */
+	int      pages_per_block;  /* Number of pages per block */
+	int      bytes_per_page;   /* Number of bytes per page (with spare) */
+	int      num_cab;          /* Number of Column address cycles */
+	int      num_rab;          /* Number of Row address cycles */
+	uint32_t ecc_mask;         /* Mask for ECC register */
+	int      large_page;       /* True if page size >= 2048 bytes */
+	int      ecc_index;        /* ECC position is different for small and
+				    * large page devices. */
+	int      chunk_size;       /* Always read/write in 512 bytes chunk max.
+				    * This will be set based on page size. */
+	int      spare_bytes;      /* Number of spare area bytes per page. */
+	int      blk_addr_shift;   /* Number of bits by which to shift block address */
+	int      page_addr_shift;  /* Number of bits by which to shift page address */
+	int      cs_offset;        /*
+				    * Chip-select offset:
+				    *   0 = CS2 space
+				    *   1 = CS3 space
+				    *   2 = CS4 space
+				    *   3 = CS5 space
+				    */
+};
+
+/* Buffer for storing data read from NAND flash */
+static uint8_t read_buf[MAX_PAGE_SIZE] __attribute__((section(".ddrram")));
+
+/* Symbol from linker script */
+extern uint32_t __NANDFlash;
+
+/* structure for holding details about the NAND device itself */
+static volatile struct nand_info_t nand_info;
+
+/* Table of ROM supported NAND devices */
+static const struct nand_dev_infos_t nand_dev_infos[] = {
+	/* id, num_blocks, pages_per_block, bytes_per_page */
+	{0x6E,   256, 16, 256+8},   /*   1 MB */
+	{0x68,   256, 16, 256+8},   /*   1 MB */
+	{0xEC,   256, 16, 256+8},   /*   1 MB */
+	{0xE8,   256, 16, 256+8},   /*   1 MB */
+	{0xEA,   512, 16, 256+8},   /*   2 MB */
+	{0xE3,   512, 16, 512+16},  /*   4 MB */
+	{0xE5,   512, 16, 512+16},  /*   4 MB */
+	{0xE6,  1024, 16, 512+16},  /*   8 MB */
+
+	{0x39,  1024, 16, 512+16},  /*   8 MB */
+	{0x6B,  1024, 16, 512+16},  /*   8 MB */
+	{0x73,  1024, 32, 512+16},  /*  16 MB */
+	{0x33,  1024, 32, 512+16},  /*  16 MB */
+	{0x75,  2048, 32, 512+16},  /*  32 MB */
+	{0x35,  2048, 32, 512+16},  /*  32 MB */
+
+	{0x43,  1024, 32, 512+16},  /*  16 MB 0x1243 */
+	{0x45,  2048, 32, 512+16},  /*  32 MB 0x1245 */
+	{0x53,  1024, 32, 512+16},  /*  16 MB 0x1253 */
+	{0x55,  2048, 32, 512+16},  /*  32 MB 0x1255 */
+	{0x36,  4096, 32, 512+16},  /*  64 MB */
+	{0x46,  4096, 32, 512+16},  /*  64 MB 0x1346 */
+	{0x56,  4096, 32, 512+16},  /*  64 MB 0x1356 */
+
+	{0x76,  4096, 32, 512+16},  /*  64 MB */
+
+	{0x74,  8192, 32, 512+16},  /* 128 MB 0x1374 */
+	{0x79,  8192, 32, 512+16},  /* 128 MB */
+	{0x71, 16384, 32, 512+16},  /* 256 MB */
+	{0xF1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xA1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xAA,  2048, 64, 2048+64}, /* 256 MB - Big Block */
+	{0xDA,  2048, 64, 2048+64}, /* 256 MB - Big Block */
+	{0xDC,  4096, 64, 2048+64}, /* 512 MB - Big Block */
+	{0xAC,  4096, 64, 2048+64}, /* 512 MB - Big Block */
+	{0xB1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xC1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xD3,  4096, 64, 2048+64}, /* 512 MB - Big Block */
+	{0x00,	   0,  0,       0}  /* Indicate end of table */
+};
+
+static volatile uint8_t *
+flash_make_addr(uint32_t baseAddr, uint32_t offset)
+{
+	return (volatile uint8_t *) (baseAddr + offset);
+}
+
+static void
+flash_write_data(uint32_t offset, uint32_t data)
+{
+	volatile union flash_ptr addr;
+	union flash_data dataword;
+	dataword.l = data;
+
+	addr.cp = flash_make_addr(nand_info.base_addr, offset);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		*addr.cp = dataword.c;
+		break;
+	case BUS_16BIT:
+		*addr.wp = dataword.w;
+		break;
+	}
+}
+
+static void
+flash_write_cmd(uint32_t cmd)
+{
+	flash_write_data(NAND_CLE_OFFSET, cmd);
+}
+
+static void
+flash_write_addr(uint32_t addr)
+{
+	flash_write_data(NAND_ALE_OFFSET, addr);
+}
+
+static void
+flash_write_bytes(const uint8_t *src, uint32_t numBytes)
+{
+	volatile union flash_ptr destAddr, srcAddr;
+	uint32_t i;
+
+	srcAddr.cp = (volatile uint8_t *) src;
+	destAddr.cp = flash_make_addr(nand_info.base_addr, NAND_DATA_OFFSET);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		for (i = 0; i < numBytes; i++)
+			*destAddr.cp = *srcAddr.cp++;
+		break;
+	case BUS_16BIT:
+		for (i = 0; i < (numBytes >> 1); i++)
+			*destAddr.wp = *srcAddr.wp++;
+		break;
+	}
+}
+
+static void
+flash_write_addr_bytes(uint32_t numAddrBytes, uint32_t addr)
+{
+	uint32_t i;
+	for (i = 0; i < numAddrBytes; i++)
+		flash_write_addr((addr >> (8*i)) & 0xff);
+}
+
+static void
+flash_write_row_addr_bytes(uint32_t block, uint32_t page)
+{
+	uint32_t row_addr;
+	row_addr =
+		(block << (nand_info.blk_addr_shift - nand_info.page_addr_shift)) | page;
+	flash_write_addr_bytes(nand_info.num_rab, row_addr);
+}
+
+static void
+flash_write_addr_cycles(uint32_t block, uint32_t page)
+{
+	flash_write_addr_bytes(nand_info.num_cab, 0x00000000);
+	flash_write_row_addr_bytes(block, page);
+}
+
+static uint32_t
+flash_read_data(void)
+{
+	volatile union flash_ptr addr;
+	union flash_data cmdword;
+
+	cmdword.l = 0x0;
+	addr.cp = flash_make_addr(nand_info.base_addr, NAND_DATA_OFFSET);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		cmdword.c = *addr.cp;
+		break;
+	case BUS_16BIT:
+		cmdword.w = *addr.wp;
+		break;
+	}
+	return cmdword.l;
+}
+
+static void
+flash_read_bytes(uint8_t *dest, uint32_t numBytes)
+{
+	volatile union flash_ptr destAddr, srcAddr;
+	uint32_t i;
+
+	destAddr.cp = (volatile uint8_t *) dest;
+	srcAddr.cp = flash_make_addr(nand_info.base_addr, NAND_DATA_OFFSET);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		for (i = 0; i < numBytes; i++)
+			*destAddr.cp++ = *srcAddr.cp;
+		break;
+	case BUS_16BIT:
+		for (i = 0; i < (numBytes >> 1); i++)
+			*destAddr.wp++ = *srcAddr.wp;
+		break;
+	}
+}
+
+/* Poll bit of NANDFSR to indicate ready */
+static int
+nand_wait_for_ready(uint32_t timeout)
+{
+	volatile uint32_t cnt = timeout;
+	uint32_t ready;
+
+	waitloop(200);
+
+	do {
+		ready = AEMIF->NANDFSR & NAND_NANDFSR_READY;
+		cnt--;
+	} while ((cnt > 0) && !ready);
+
+	if (cnt == 0) {
+		log_info("NAND busy timeout");
+		return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+/* Wait for the status to be ready in NAND register
+ * There were some problems reported in DM320 with Ready/Busy pin
+ * not working with all NANDs. So this check has also been added.
+ */
+static int
+nand_wait_for_status(uint32_t timeout)
+{
+	volatile uint32_t cnt;
+	uint32_t status;
+	cnt = timeout;
+
+	do {
+		flash_write_cmd(NAND_STATUS);
+		status = flash_read_data() &
+			(NAND_STATUS_ERROR | NAND_STATUS_BUSY);
+		cnt--;
+	} while ((cnt > 0) && !status);
+
+	if (cnt == 0) {
+		log_info("NAND status timeout");
+		return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+/* Read the current ECC calculation and restart process */
+static uint32_t
+nand_read_ecc(void)
+{
+	uint32_t retval;
+
+	/* Read and mask appropriate (based on CSn space flash is in)
+	 * ECC register */
+	retval = ((uint32_t *)(&(AEMIF->NANDF1ECC)))[nand_info.cs_offset] &
+		nand_info.ecc_mask;
+
+	waitloop(5);
+
+#ifdef NAND_DEBUG
+	uart_send_str("Value read from ECC register: ");
+	uart_send_hexnum(retval, 8);
+	uart_send_lf();
+#endif
+
+	/* Write appropriate bit to start ECC calculations */
+	AEMIF->NANDFCR |= (1<<(8 + (nand_info.cs_offset)));
+	return retval;
+}
+
+/* Get details of the NAND flash used from the id and the table of NAND
+ * devices. */
+static int
+nand_get_details(void)
+{
+	uint32_t deviceID, i, j;
+
+	/* Issue device read ID command. */
+	flash_write_cmd(NAND_RDID);
+	flash_write_addr(NAND_RDIDADD);
+
+	/* Read ID bytes */
+	j        = flash_read_data() & 0xFF;
+	deviceID = flash_read_data() & 0xFF;
+	j        = flash_read_data() & 0xFF;
+	j        = flash_read_data() & 0xFF;
+
+	uart_send_str("  ID:");
+	uart_send_hexnum(deviceID, 2);
+	if (nand_info.bus_width == BUS_16BIT)
+		uart_send_str(", 16");
+	else
+		uart_send_str(", 8");
+
+	log_info("-bit bus");
+
+	i = 0;
+	while (nand_dev_infos[i].id != 0x00) {
+		if (deviceID == nand_dev_infos[i].id) {
+			nand_info.id = (uint8_t) nand_dev_infos[i].id;
+			nand_info.pages_per_block =
+				nand_dev_infos[i].pages_per_block;
+			nand_info.num_blocks = nand_dev_infos[i].num_blocks;
+			nand_info.bytes_per_page =  NANDFLASH_PAGESIZE(
+				nand_dev_infos[i].bytes_per_page);
+
+			nand_info.spare_bytes = nand_dev_infos[i].bytes_per_page -
+				nand_info.bytes_per_page;
+
+			/* Configure small or large page device. */
+			if (nand_info.bytes_per_page >= 2048) {
+				/* Set the large page flag */
+				nand_info.large_page = true;
+				nand_info.ecc_index = 2;
+				nand_info.chunk_size = 512; /* Limit to 512 bytes */
+			} else {
+				/* Clear the large page flag */
+				nand_info.large_page = false;
+				nand_info.ecc_index = 0;
+				nand_info.chunk_size = nand_info.bytes_per_page;
+			}
+
+			/* Setup address shift values */
+			j = 0;
+			while ((nand_info.pages_per_block >> j) > 1)
+				j++;
+
+			nand_info.blk_addr_shift = j;
+			nand_info.page_addr_shift = (nand_info.large_page) ? 16 : 8;
+
+			nand_info.blk_addr_shift += nand_info.page_addr_shift;
+
+			/* Set number of column address bytes needed */
+			nand_info.num_cab = nand_info.page_addr_shift >> 3;
+
+			j = 0;
+			while ((nand_info.num_blocks >> j) > 1)
+				j++;
+
+			/* Set number of row address bytes needed */
+			if ((nand_info.blk_addr_shift + j) <= 24)
+				nand_info.num_rab = 3 -
+					nand_info.num_cab;
+			else if ((nand_info.blk_addr_shift + j) <= 32)
+				nand_info.num_rab = 4 -
+					nand_info.num_cab;
+			else
+				nand_info.num_rab = 5 -
+					nand_info.num_cab;
+
+			/* Set the ECC bit mask */
+			if (nand_info.bytes_per_page < 512)
+				nand_info.ecc_mask = 0x07FF07FF;
+			else
+				nand_info.ecc_mask = 0x0FFF0FFF;
+
+			/* Report informations */
+			uart_send_str("  Blocks: ");
+			uart_send_hexnum(nand_info.num_blocks, 5);
+			uart_send_str(", Pages/block: ");
+			uart_send_hexnum(nand_info.pages_per_block, 3);
+			uart_send_str(", Bytes per page: ");
+			uart_send_hexnum(nand_info.bytes_per_page, 4);
+			uart_send_lf();
+
+			/* Report additional debug informations */
+#ifdef NAND_DEBUG
+			uart_send_str("  Page shift: ");
+			uart_send_hexnum(nand_info.page_addr_shift, 2);
+			uart_send_lf();
+			uart_send_str("  Block shift: ");
+			uart_send_hexnum(nand_info.blk_addr_shift, 2);
+			uart_send_lf();
+			uart_send_str("  Column address bytes: ");
+			uart_send_hexnum(nand_info.num_cab, 2);
+			uart_send_lf();
+			uart_send_str("  Row address bytes: ");
+			uart_send_hexnum(nand_info.num_rab, 2);
+			uart_send_lf();
+			uart_send_str("  ECC mask: ");
+			uart_send_hexnum(nand_info.ecc_mask, 8);
+			uart_send_lf();
+#endif
+
+			return E_PASS;
+		}
+		i++;
+	}
+	/* No match was found for the device ID */
+	return E_FAIL;
+}
+
+static void
+nand_write_spare(uint32_t eccvalue)
+{
+	uint32_t spare_data[4] = {
+		0xFFFFFFFF,
+		0xFFFFFFFF,
+		0xFFFFFFFF,
+		0xFFFFFFFF
+	};
+
+	/* Place the ECC values where the RBL expects them */
+	spare_data[nand_info.ecc_index] = eccvalue;
+
+	/* Write spare bytes infos */
+	if (nand_info.bytes_per_page == 256)
+		flash_write_bytes((uint8_t *) spare_data, 8);
+	else
+		flash_write_bytes((uint8_t *) spare_data, 16);
+}
+
+/*
+ * RBL-expected layout for large page NAND (ex: 2048 bytes/page):
+ *
+ *     DM35x          DM644x
+ * -----------------------------
+ *   512 DATA       2048 DATA
+ *    16 SPARE        64 SPARE
+ *   512 DATA
+ *    16 SPARE
+ *   512 DATA
+ *    16 SPARE
+ *   512 DATA
+ *    16 SPARE
+ *
+ * So for big block NAND devices (bytes per page > 512) on the DM35x, we must
+ * write 512 bytes and write the ECC immediately after that data, and repeat
+ * until all the page is written.
+ */
+
+/* Generic routine to write a page of data to NAND */
+static int
+nand_write_page(uint32_t block, uint32_t page, const uint8_t *src)
+{
+	uint32_t hw_ecc[4]; /* Maximum of 2048 bytes/page (4 * 512 = 2048) */
+	uint8_t numWrites, i;
+
+	numWrites = (nand_info.bytes_per_page >> 9); /* Divide by 512 */
+	if (numWrites == 0)
+		numWrites++;
+
+	/* Write program command */
+	flash_write_cmd(NAND_PGRM_START);
+
+	/* Write address bytes */
+	flash_write_addr_cycles(block, page);
+
+	/* Starting the ECC in the NANDFCR register for CS2 (bit no.8) */
+	nand_read_ecc();
+
+	/* Write data */
+	for (i = 0; i < numWrites; i++) {
+		/* Write data to page */
+		flash_write_bytes(src, nand_info.chunk_size);
+
+		/* Read the ECC value */
+		hw_ecc[i] = nand_read_ecc();
+
+		/* Format ECC */
+		endian_data(&(hw_ecc[i]));
+
+#if defined(DM35x)
+		/* Write spare area */
+		nand_write_spare(hw_ecc[i]);
+#endif
+
+		/* Increment the pointer */
+		src += nand_info.chunk_size;
+	}
+
+#if defined(DM644x)
+	for (i = 0; i < numWrites; i++) {
+		nand_write_spare(hw_ecc[i]);
+	}
+#endif
+
+	/* Write program end command */
+	flash_write_cmd(NAND_PGRM_END);
+
+	/* Wait for the device to be ready */
+	if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+		return E_FAIL;
+
+	/* Return status check result */
+	return nand_wait_for_status(NAND_TIMEOUT);
+}
+
+static uint32_t
+nand_read_spare(void)
+{
+	uint32_t spare_ecc[4], spare_ecc_temp;
+
+	/* Read the stored ECC value(s) */
+	if (nand_info.bytes_per_page == 256)
+		flash_read_bytes((uint8_t *) spare_ecc, 8);
+	else
+		flash_read_bytes((uint8_t *) spare_ecc, 16);
+
+	spare_ecc_temp = spare_ecc[nand_info.ecc_index];
+
+	/* Format ECC */
+	endian_data(&spare_ecc_temp);
+
+	return spare_ecc_temp;
+}
+
+/* Read a page from NAND */
+int
+nand_read_page(uint32_t block, uint32_t page, uint8_t *dest)
+{
+	uint32_t hw_ecc[4];
+	uint32_t spare_ecc[4];
+	uint8_t numReads, i;
+
+	numReads = (nand_info.bytes_per_page >> 9); /* Divide by 512 */
+	if (numReads == 0)
+		numReads++;
+
+	/* Write read command */
+	flash_write_cmd(NAND_LO_PAGE);
+
+	/* Write address bytes */
+	flash_write_addr_cycles(block, page);
+
+	/* Additional confirm command for big_block devices */
+	if (nand_info.large_page)
+		flash_write_cmd(NAND_READ_30H);
+
+	/* Wait for data to be available */
+	if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+		return E_FAIL;
+
+	/* Starting the ECC in the NANDFCR register for CS2(bit no.8) */
+	nand_read_ecc();
+
+	/* Read the page data */
+	for (i = 0; i < numReads; i++) {
+		/* Read data bytes */
+		flash_read_bytes(dest, nand_info.chunk_size);
+
+		/* Read hardware computed ECC */
+		hw_ecc[i] = nand_read_ecc();
+
+#if defined(DM35x)
+		/* Read spare area ECC */
+		spare_ecc[i] = nand_read_spare();
+#endif
+
+		/* Increment the pointer */
+		dest += nand_info.chunk_size;
+	}
+
+#if defined(DM644x)
+	for (i = 0; i < numReads; i++) {
+		spare_ecc[i] = nand_read_spare();
+	}
+#endif
+
+#ifndef NAND_BYPASS_READ_PAGE_ECC_CHECK
+	for (i = 0; i < numReads; i++) {
+		/* Verify ECC values */
+		if (hw_ecc[i] != spare_ecc[i]) {
+			log_info("NAND ECC failure:");
+			uart_send_str("HW    = ");
+			uart_send_hexnum(hw_ecc[i], 8);
+			uart_send_lf();
+			uart_send_str("SPARE =");
+			uart_send_hexnum(spare_ecc[i], 8);
+			uart_send_lf();
+
+			return E_FAIL;
+		}
+	}
+#endif /* NAND_BYPASS_READ_PAGE_ECC_CHECK */
+
+	/* Return status check result */
+	return nand_wait_for_status(NAND_TIMEOUT);
+}
+
+/* Verify data written by reading and comparing byte for byte */
+static int
+nand_verify_page(int block, int page, const uint8_t *src)
+{
+	int i;
+
+	if (nand_read_page(block, page, read_buf) != E_PASS)
+		return E_FAIL;
+
+	for (i = 0; i < nand_info.bytes_per_page; i++) {
+		/* Check for data read errors */
+		if (src[i] != read_buf[i]) {
+			int k = i;
+
+			uart_send_str("NAND verify page failed at block ");
+			uart_send_hexnum(block, 4);
+			uart_send_str(", page ");
+			uart_send_hexnum(page, 4);
+			uart_send_str(", offset ");
+			uart_send_hexnum(i, 4);
+			uart_send_lf();
+
+			for (k = i - 8; k < (i + 20); k += 4) {
+				uart_send_str("offset ");
+				uart_send_hexnum(k, 4);
+				uart_send_str(", ram=");
+				uart_send_hexnum(*((uint32_t *) &src[k]), 8);
+				uart_send_str(", nand=");
+				uart_send_hexnum(*((uint32_t *) &read_buf[k]), 8);
+				uart_send_lf();
+			}
+
+			return E_FAIL;
+		}
+	}
+	return E_PASS;
+}
+
+/* NAND Flash unprotect command */
+static uint32_t
+nand_unprotect_blocks(uint32_t startBlkNum, uint32_t blkCnt)
+{
+	uint32_t endBlkNum;
+	endBlkNum = startBlkNum + blkCnt - 1;
+
+	uart_send_str("Unprotecting blocks ");
+	uart_send_hexnum(startBlkNum, 4);
+	uart_send_str(" to ");
+	uart_send_hexnum(endBlkNum, 4);
+	uart_send_lf();
+
+	/* Do bounds checking */
+	if (endBlkNum >= nand_info.num_blocks) {
+		log_fail("Invalid last block");
+		return E_FAIL;
+	}
+
+	flash_write_cmd(NAND_UNLOCK_START);
+	flash_write_row_addr_bytes(startBlkNum, 0);
+
+	flash_write_cmd(NAND_UNLOCK_END);
+	flash_write_row_addr_bytes(endBlkNum, 0);
+
+	return E_PASS;
+}
+
+/* NAND Flash protect command */
+static void
+nand_protect_blocks(void)
+{
+	log_info("Protecting the entire NAND flash");
+	flash_write_cmd(NAND_LOCK);
+}
+
+/* NAND Flash erase block function */
+static uint32_t
+nand_erase_blocks(uint32_t startBlkNum, uint32_t blkCnt)
+{
+	uint32_t i;
+
+	/* Do bounds checking */
+	if ((startBlkNum + blkCnt - 1) >= nand_info.num_blocks)
+		return E_FAIL;
+
+	/* Output info about what we are doing */
+	uart_send_str("Erasing blocks ");
+	uart_send_hexnum(startBlkNum, 4);
+	uart_send_str(" to ");
+	uart_send_hexnum(startBlkNum + blkCnt - 1, 4);
+	uart_send_lf();
+
+	for (i = 0; i < blkCnt; i++) {
+		/* Start erase command */
+		flash_write_cmd(NAND_BERASEC1);
+
+		/* Write the row addr bytes only */
+		flash_write_row_addr_bytes(startBlkNum + i, 0);
+
+		/* Confirm erase command */
+		flash_write_cmd(NAND_BERASEC2);
+
+		/* Wait for the device to be ready */
+		if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+			return E_FAIL;
+
+		/* Verify the op succeeded by reading status from flash */
+		if (nand_wait_for_status(NAND_TIMEOUT) != E_PASS)
+			return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+/* Initialize NAND interface and find the details of the NAND used */
+int
+nand_init(void)
+{
+	uint32_t width;
+	uint32_t *CSRegs;
+
+	log_info("Initializing NAND flash:");
+
+#ifdef NAND_BYPASS_READ_PAGE_ECC_CHECK
+	log_info("  Bypassing ECC checks");
+#endif /* NAND_BYPASS_READ_PAGE_ECC_CHECK */
+
+	/* Set NAND flash base address */
+	nand_info.base_addr = (uint32_t) &__NANDFlash;
+
+	/* Get the cs_offset (can be 0 through 3 - corresponds with CS2 through
+	 * CS5) */
+	nand_info.cs_offset = (nand_info.base_addr >> 25) - 1;
+
+	/* Setting the nand_width = 0(8 bit NAND) or 1(16 bit NAND). AEMIF CS2
+	 *  bus Width is given by the BOOTCFG(bit no.5). */
+	width = (((SYSTEM->BOOTCFG) & 0x20) >> 5);
+	nand_info.bus_width = (width)?BUS_16BIT:BUS_8BIT;
+
+	/* Setup AEMIF registers for NAND     */
+	CSRegs = (uint32_t *) &(AEMIF->A1CR);
+
+	/* Set correct AxCR reg */
+	CSRegs[nand_info.cs_offset] = 0x3FFFFFFC | width;
+
+	/* NAND enable for CSx. */
+	AEMIF->NANDFCR |= (0x1 << (nand_info.cs_offset));
+	nand_read_ecc();
+
+	/* Send reset command to NAND */
+	flash_write_cmd(NAND_RESET);
+
+	if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+		return E_FAIL;
+
+	return nand_get_details();
+}
+
+static int
+nand_write_verify_page(int block, int page, const uint8_t *src)
+{
+	int status;
+
+	status = nand_write_page(block, page, src);
+	if (status != E_PASS)
+		return E_FAIL;
+
+	waitloop(200);
+
+	/* Verify the page just written */
+	return nand_verify_page(block, page, src);
+}
+
+int
+nand_write_prog(struct nand_image_descriptor_t *im_desc, const uint8_t *src,
+		size_t size)
+{
+	int num_blocks;
+	int max_block_num;
+	int page_num;
+	uint32_t count_mask;
+
+	im_desc->page_num = 1; /* Always start data in page 1 */
+
+	/* Do some rounding based on data buffer size */
+	im_desc->size_in_pages = 0;
+	while ((im_desc->size_in_pages * nand_info.bytes_per_page) < size)
+		im_desc->size_in_pages++;
+
+	/* Get total number of blocks needed */
+	num_blocks = 0;
+	while ((num_blocks * nand_info.pages_per_block) <
+	       (im_desc->size_in_pages + 1))
+		num_blocks++;
+
+	uart_send_str("Needed blocks: ");
+	uart_send_hexnum(num_blocks, 4);
+	uart_send_lf();
+	uart_send_str("Needed pages: ");
+	uart_send_hexnum(im_desc->size_in_pages, 4);
+	uart_send_lf();
+
+	/* Check whether writing UBL or APP (based on destination block) */
+	if (im_desc->block_num == START_UBL_BLOCK_NUM)
+		max_block_num = END_UBL_BLOCK_NUM;
+	else
+		max_block_num = nand_info.num_blocks - 1;
+
+NAND_WRITE_RETRY:
+	if (im_desc->block_num > max_block_num) {
+		log_fail("Block > last block");
+		return E_FAIL;
+	}
+
+	uart_send_str("Trying block ");
+	uart_send_hexnum(im_desc->block_num, 4);
+	uart_send_lf();
+
+	/* Unprotect all needed blocks of the Flash */
+	if (nand_unprotect_blocks(im_desc->block_num, num_blocks) != E_PASS) {
+		im_desc->block_num++;
+		log_info("Unprotect failed");
+		goto NAND_WRITE_RETRY;
+	}
+
+	/* Erase the block where the header goes and the data starts */
+	if (nand_erase_blocks(im_desc->block_num, num_blocks) != E_PASS) {
+		im_desc->block_num++;
+		log_info("Erase failed");
+		goto NAND_WRITE_RETRY;
+	}
+
+#ifdef NAND_DEBUG_WRITE_RAMP
+	{
+		int k;
+
+		/* Usefull for debugging NAND ECC and spare bytes errors. */
+		for (k = 0; k < 512; k++)
+			ptr[k] = 0xCAFE0000 | k;
+	}
+#endif
+
+	page_num = 0; /* Start in page 0. */
+
+	if (im_desc->magic != UBL_CMD_FLASH_DATA) {
+		/* Write the header to page 0. */
+		log_info("Writing header");
+
+		if (nand_write_verify_page(im_desc->block_num, page_num,
+					   (uint8_t *) im_desc) != E_PASS)
+			return E_FAIL;
+
+		/* Set starting page number for next data portion. */
+		page_num = 1;
+	}
+
+	/* The following assumes power of 2 page_cnt - *should* always be
+	 * valid. */
+	count_mask = nand_info.pages_per_block - 1;
+	log_info("Writing data");
+	do {
+		/* Write data on a per page basis */
+		if (nand_write_verify_page(im_desc->block_num,
+					   page_num & count_mask, src)
+		    != E_PASS)
+			return E_FAIL;
+
+		page_num++;
+		src += nand_info.bytes_per_page;
+		if (!(page_num & count_mask))
+			im_desc->block_num++;
+	} while (page_num <= im_desc->size_in_pages);
+
+	nand_protect_blocks();
+
+	return E_PASS;
+}
+
+int
+nand_erase_all(void)
+{
+	/* Unprotect the NAND Flash */
+	nand_unprotect_blocks(0, nand_info.num_blocks - 1);
+
+	/* Erase all the pages */
+	if (nand_erase_blocks(0, nand_info.num_blocks - 1) != E_PASS)
+		return E_FAIL;
+
+	/* Protect the device */
+	nand_protect_blocks();
+
+	return E_PASS;
+}
+
+int
+nand_get_pages_per_block(void)
+{
+	return nand_info.pages_per_block;
+}
+
+int
+nand_get_bytes_per_page(void)
+{
+	return nand_info.bytes_per_page;
+}
diff --git a/nand.h b/nand.h
new file mode 100644
index 0000000..e1d3165
--- /dev/null
+++ b/nand.h
@@ -0,0 +1,64 @@
+/*
+ * nand.h - NAND flash definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _NAND_H_
+#define _NAND_H_
+
+#include "common.h"
+#include "davinci.h"
+
+/* Define which blocks are valid for writing UBL and APP data */
+#define START_UBL_BLOCK_NUM     1
+#define END_UBL_BLOCK_NUM       5
+#define START_APP_BLOCK_NUM     6
+
+#define MAX_PAGE_SIZE	        (2048+64) /* Data bytes + spare area */
+
+/* NAND descriptor expected by RBL when it loads UBL image. */
+struct nand_image_descriptor_t {
+	uint32_t magic;
+	uint32_t entry_point;
+	uint32_t size_in_pages;
+	uint32_t block_num;
+	uint32_t page_num;
+	uint32_t load_address; /* Not used by RBL */
+};
+
+
+int nand_init(void);
+
+int nand_erase_all(void);
+
+int nand_read_page(uint32_t block, uint32_t page, uint8_t *dest);
+
+int nand_write_prog(struct nand_image_descriptor_t *im_desc,
+		    const uint8_t *src, size_t size);
+
+/* Copy Application from NAND to RAM */
+int nand_copy(uint32_t *jump_entry_point);
+
+int nand_get_pages_per_block(void);
+
+int nand_get_bytes_per_page(void);
+
+#endif /* _NAND_H_ */
diff --git a/nandboot.c b/nandboot.c
new file mode 100644
index 0000000..d78db42
--- /dev/null
+++ b/nandboot.c
@@ -0,0 +1,125 @@
+/*
+ * nandboot.c - NAND boot mode functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "nand.h"
+#include "util.h"
+#include "uart.h"
+
+static uint8_t nand_header[MAX_PAGE_SIZE] __attribute__((section(".ddrram")));
+
+/*
+ * Find out where the application is and copy to RAM
+ *   jump_entry_point: Entry point for application we are decoding out of
+ *   flash
+ */
+int
+nand_copy(uint32_t *jump_entry_point)
+{
+	uint32_t count, start_block;
+	uint32_t i;
+	uint32_t magicNum;
+	uint32_t block, page;
+	uint32_t readError = E_FAIL;
+	int failedOnceAlready = false;
+	uint8_t *rxBuf; /* Temporary buffer to load header in NAND */
+	struct nand_image_descriptor_t im_desc;
+	int last_header_block;
+
+	start_block = START_APP_BLOCK_NUM;
+	last_header_block = start_block + 10;
+
+NAND_startAgain:
+	/* Read header about application starting at START_APP_BLOCK_NUM, Page 0
+	 * and try 10 blocks. */
+	for (count = start_block; count <= last_header_block; count++) {
+		if (nand_read_page(count, 0, nand_header) != E_PASS)
+			continue;
+
+		magicNum = *((uint32_t *) nand_header);
+
+		/* Valid magic number found */
+		if ((magicNum & MAGIC_NUMBER_MASK) == MAGIC_NUMBER_VALID) {
+			start_block = count;
+			break;
+		}
+	}
+
+	/* Never found valid header. */
+	if (count > last_header_block)
+		return E_FAIL;
+
+	memcpy((void *) &im_desc, nand_header, sizeof(im_desc));
+
+	uart_send_str("Image infos: Magic = ");
+	uart_send_hexnum(im_desc.magic, 8);
+	uart_send_str(", Entry = ");
+	uart_send_hexnum(im_desc.entry_point, 8);
+	uart_send_str(", Pages = ");
+	uart_send_hexnum(im_desc.size_in_pages, 8);
+	uart_send_str(", Load = ");
+	uart_send_hexnum(im_desc.load_address, 8);
+	uart_send_lf();
+
+	rxBuf = (uint8_t *) im_desc.load_address;
+
+NAND_retry:
+	/* initialize block and page number to be used for read */
+	block = im_desc.block_num;
+	page = im_desc.page_num;
+
+	/* Perform the actual copying of the application from NAND to RAM */
+	for (i = 0; i < im_desc.size_in_pages; i++) {
+		/* if page goes beyond max number of pages increment block
+		 * number and reset page number */
+		if (page >= nand_get_pages_per_block()) {
+			page = 0;
+			block++;
+		}
+
+		/* Copy the data */
+		readError =
+			nand_read_page(block, page++,
+				       &rxBuf[i * nand_get_bytes_per_page()]);
+
+		/*
+		 * We attempt to read the app data twice. If we fail twice then
+		 * we go look for a new application header in the NAND flash at
+		 * the next block.
+		 */
+		if (readError != E_PASS) {
+			if (failedOnceAlready) {
+				start_block++;
+				goto NAND_startAgain;
+			} else {
+				failedOnceAlready = true;
+				goto NAND_retry;
+			}
+		}
+	}
+
+	/* Application was read correctly, so set entrypoint */
+	*jump_entry_point = im_desc.entry_point;
+
+	return E_PASS;
+}
diff --git a/nor.c b/nor.c
new file mode 100644
index 0000000..6ff7936
--- /dev/null
+++ b/nor.c
@@ -0,0 +1,1187 @@
+/*
+ * nor.c - NOR flash functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "util.h"
+#include "uart.h"
+#include "nor.h"
+
+/* Bit masks */
+#define BIT0    0x00000001
+#define BIT1    0x00000002
+#define BIT2    0x00000004
+#define BIT3    0x00000008
+#define BIT4    0x00000010
+#define BIT5    0x00000020
+#define BIT6    0x00000040
+#define BIT7    0x00000080
+#define BIT8    0x00000100
+#define BIT9    0x00000200
+#define BIT10   0x00000400
+#define BIT11   0x00000800
+#define BIT12   0x00001000
+#define BIT13   0x00002000
+#define BIT14   0x00004000
+#define BIT15   0x00008000
+#define BIT16   0x00010000
+#define BIT17   0x00020000
+#define BIT18   0x00040000
+#define BIT19   0x00080000
+#define BIT20   0x00100000
+#define BIT21   0x00200000
+#define BIT22   0x00400000
+#define BIT23   0x00800000
+#define BIT24   0x01000000
+#define BIT25   0x02000000
+#define BIT26   0x04000000
+#define BIT27   0x08000000
+#define BIT28   0x10000000
+#define BIT29   0x20000000
+#define BIT30   0x40000000
+#define BIT31   0x80000000
+
+#define BUS_8BIT    0x01
+#define BUS_16BIT   0x02
+#define BUS_32BIT   0x04
+
+/**************** DEFINES for AMD Basic Command Set **************/
+#define AMD_CMD0                    0xAA        // AMD CMD PREFIX 0
+#define AMD_CMD1                    0x55        // AMD CMD PREFIX 1
+#define AMD_CMD0_ADDR               0x555       // AMD CMD0 Offset 
+#define AMD_CMD1_ADDR               0x2AA       // AMD CMD1 Offset 
+#define AMD_CMD2_ADDR       	    0x555       // AMD CMD2 Offset 
+#define AMD_ID_CMD                  0x90        // AMD ID CMD
+#define AMD_MANFID_ADDR             0x00        // Manufacturer ID offset
+#define AMD_DEVID_ADDR0             0x01        // First device ID offset
+#define AMD_DEVID_ADDR1             0x0E        // Offset for 2nd byte of 3 byte ID 
+#define AMD_DEVID_ADDR2             0x0F        // Offset for 3rd byte of 3 byte ID 
+#define AMD_ID_MULTI                0x7E        // First-byte ID value for 3-byte ID
+#define AMD_RESET                   0xF0        // AMD Device Reset Command
+#define AMD_BLK_ERASE_SETUP_CMD     0x80        // Block erase setup
+#define AMD_BLK_ERASE_CMD	        0x30        // Block erase confirm
+#define AMD_BLK_ERASE_DONE	        0xFFFF      // Block erase check value
+#define AMD_PROG_CMD                0xA0        // AMD simple Write command
+#define AMD_WRT_BUF_LOAD_CMD        0x25        // AMD write buffer load command
+#define AMD_WRT_BUF_CONF_CMD        0x29        // AMD write buffer confirm command
+
+/**************** DEFINES for Intel Basic Command Set **************/
+#define INTEL_ID_CMD            0x90        // Intel ID CMD
+#define INTEL_MANFID_ADDR       0x00        // Manufacturer ID offset
+#define INTEL_DEVID_ADDR        0x01        // Device ID offset
+#define INTEL_RESET             0xFF        // Intel Device Reset Command
+#define INTEL_ERASE_CMD0        0x20        // Intel Erase command
+#define INTEL_ERASE_CMD1        0xD0        // Intel Erase command
+#define INTEL_WRITE_CMD         0x40        // Intel simple write command
+#define INTEL_WRT_BUF_LOAD_CMD  0xE8        // Intel write buffer load command
+#define INTEL_WRT_BUF_CONF_CMD  0xD0        // Intel write buffer confirm command
+#define INTEL_LOCK_CMD0         0x60        // Intel lock mode command
+#define INTEL_LOCK_BLOCK_CMD    0x01        // Intel lock command
+#define INTEL_UNLOCK_BLOCK_CMD  0xD0        // Intel unlock command
+#define INTEL_CLEARSTATUS_CMD   0x50        // Intel clear status command
+
+
+/**************** DEFINES for CFI Commands and Table **************/
+
+// CFI Entry and Exit commands
+#define CFI_QRY_CMD             0x98U
+#define CFI_EXIT_CMD            0xF0U
+
+// CFI address locations
+#define CFI_QRY_CMD_ADDR        0x55U
+
+// CFI Table Offsets in Bytes
+#define CFI_Q                   0x10
+#define CFI_R                   0x11
+#define CFI_Y                   0x12
+#define CFI_CMDSET              0x13
+#define CFI_CMDSETADDR          0x15
+#define CFI_ALTCMDSET           0x17
+#define CFI_ALTCMDSETADDR       0x19
+#define CFI_MINVCC              0x1B
+#define CFI_MAXVCC              0x1C
+#define CFI_MINVPP              0x1D
+#define CFI_MAXVPP              0x1E
+#define CFI_TYPBYTEPGMTIME      0x1F
+#define CFI_TYPBUFFERPGMTIME    0x20
+#define CFI_TYPBLOCKERASETIME   0x21
+#define CFI_TYPCHIPERASETIME    0x22
+#define CFI_MAXBYTEPGMTIME      0x23
+#define CFI_MAXBUFFERPGMTIME    0x24
+#define CFI_MAXBLOCKERASETIME   0x25
+#define CFI_MAXCHIPERASETIME    0x26
+#define CFI_DEVICESIZE          0x27
+#define CFI_INTERFACE           0x28
+#define CFI_WRITESIZE           0x2A
+#define CFI_NUMBLKREGIONS       0x2C
+#define CFI_BLKREGIONS          0x2D
+#define CFI_BLKREGIONSIZE       0x04
+
+// Maximum number of block regions supported
+#define CFI_MAXREGIONS          0x06
+
+/*********************** Enumerated types *************************/
+// Supported Flash Manufacturers
+enum flash_manufacturer_id_t {
+	UNKNOWN_ID = 0x00,
+	AMD = 0x01,
+	FUJITSU = 0x04,
+	INTEL = 0x89,
+	MICRON = 0x2C,
+	SAMSUNG = 0xEC,
+	SHARP = 0xB0
+};
+
+typedef enum flash_manufacturer_id_t MANFID;
+
+// Supported CFI command sets
+enum FlashCommandSet {
+	UNKNOWN_CMDSET = 0x0000,
+	INTEL_EXT_CMDSET = 0x0001,
+	AMD_BASIC_CMDSET = 0x0002,
+	INTEL_BASIC_CMDSET = 0x0003,
+	AMD_EXT_CMDSET = 0x0004,
+	MITSU_BASIC_CMDSET = 0x0100,
+	MITSU_EXT_CMDSET = 0x0101
+    
+};
+typedef enum FlashCommandSet CMDSET;
+
+/*************************** Structs *********************************/
+// Struct to hold discovered flash parameters
+typedef struct _NOR_MEDIA_STRUCT_ {
+	uint32_t       flashBase;                          // 32-bit address of flash start
+	uint8_t        busWidth;                           // 8-bit or 16-bit bus width
+	uint8_t        chipOperatingWidth;                 // The operating width of each chip
+	uint8_t        maxTotalWidth;                      // Maximum extent of width of all chips combined - determines offset shifts
+	uint32_t       flashSize;                          // Size of NOR flash regions in bytes (numberDevices * size of one device)
+	uint32_t       bufferSize;                         // Size of write buffer
+	CMDSET       commandSet;                         // command set id (see CFI documentation)
+	uint8_t        numberDevices;                      // Number of deives used in parallel
+	uint8_t        numberRegions;                      // Number of regions of contiguous regions of same block size
+	uint32_t       numberBlocks[CFI_MAXREGIONS];    // Number of blocks in a region
+	uint32_t       blockSize[CFI_MAXREGIONS];       // Size of the blocks in a region
+	enum flash_manufacturer_id_t       manfID;                             // Manufacturer's ID
+	uint16_t       devID1;                             // Device ID
+	uint16_t       devID2;                             // Used for AMD 3-byte ID devices
+} NOR_INFO, *PNOR_INFO;
+
+typedef union {
+	uint8_t c;
+	uint16_t w;
+	uint32_t l;
+} FLASHData;
+
+typedef union {
+	volatile uint8_t *cp;
+	volatile uint16_t *wp;
+	volatile uint32_t *lp;
+} FLASHPtr;
+
+//External and global static variables
+extern uint32_t __NORFlash;
+
+static volatile NOR_INFO gNorInfo;
+
+// ----------------- Bus Width Agnostic commands -------------------
+volatile uint8_t *flash_make_addr (uint32_t blkAddr, uint32_t offset)
+{
+	return ((volatile uint8_t *) ( blkAddr + (offset * gNorInfo.maxTotalWidth)));
+}
+
+void flash_make_cmd (uint8_t cmd, void *cmdbuf)
+{
+	int32_t i;
+	uint8_t *cp = (uint8_t *) cmdbuf;
+
+	for (i = gNorInfo.busWidth; i > 0; i--)
+		*cp++ = (i & (gNorInfo.chipOperatingWidth - 1)) ? 0x00 : cmd;
+}
+
+void flash_write_cmd (uint32_t blkAddr, uint32_t offset, uint8_t cmd)
+{
+	volatile FLASHPtr addr;
+	FLASHData cmdword;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( cmd, &cmdword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		*addr.cp = cmdword.c;
+		break;
+        case BUS_16BIT:
+		*addr.wp = cmdword.w;
+		break;
+	}
+}
+
+void flash_write_data(uint32_t address, uint32_t data)
+{
+	volatile FLASHPtr pAddr;
+	FLASHData dataword;
+	dataword.l = data;
+
+	pAddr.cp = (volatile uint8_t*) address;
+	
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		*pAddr.cp = dataword.c;
+		break;
+        case BUS_16BIT:
+		*pAddr.wp = dataword.w;
+		break;
+	}
+}
+
+void flash_write_databuffer(uint32_t* address, void* data, uint32_t numBytes)
+{
+	volatile FLASHPtr pAddr, pData;
+	volatile uint8_t* endAddress;
+		
+	pData.cp = (volatile uint8_t*) data;
+	pAddr.cp = (volatile uint8_t*) *address;
+	endAddress =(volatile uint8_t*)((*address)+numBytes);
+	while (pAddr.cp < endAddress)
+	{
+		switch (gNorInfo.busWidth)
+		{
+	        case BUS_8BIT:
+			*pAddr.cp++ = *pData.cp++;
+			break;
+		case BUS_16BIT:
+			*pAddr.wp++ = *pData.wp++;
+			break;
+		}
+	}
+    
+	// Put last data written at start of data buffer - For AMD verification
+	switch (gNorInfo.busWidth)
+	{
+        case BUS_8BIT:
+		*address = (uint32_t)(endAddress-1);
+		break;
+        case BUS_16BIT:
+		*address = (uint32_t)(endAddress-2);
+		break;
+	}
+
+}
+
+uint32_t flash_verify_databuffer(uint32_t address, void* data, uint32_t numBytes)
+{
+	volatile FLASHPtr pAddr, pData;
+	volatile uint8_t* endAddress;
+		
+	pData.cp = (volatile uint8_t*) data;
+	pAddr.cp = (volatile uint8_t*) address;
+	endAddress =(volatile uint8_t*)(address+numBytes);
+	while (pAddr.cp < endAddress)
+	{
+		switch (gNorInfo.busWidth)
+		{
+	        case BUS_8BIT:
+			if ( (*pAddr.cp++) != (*pData.cp++) )
+				return E_FAIL;
+			break;
+		case BUS_16BIT:
+			if ( (*pAddr.wp++) != (*pData.wp++) )
+				return E_FAIL;
+			break;
+		}
+	}
+	return E_PASS;
+}
+
+uint32_t flash_read_data(uint32_t address, uint32_t offset)
+{
+	volatile FLASHPtr pAddr;
+	FLASHData dataword;
+	dataword.l = 0x00000000;
+
+	pAddr.cp = flash_make_addr(address, offset);
+	
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		dataword.c = *pAddr.cp;
+		break;
+            
+        case BUS_16BIT:
+		dataword.w = *pAddr.wp;
+		break;
+	}
+	return dataword.l;
+}
+
+FLASHData flash_read_CFI_bytes (uint32_t blkAddr, uint32_t offset, uint8_t numBytes)
+{
+	int32_t i;
+	FLASHData readword;
+	uint8_t* pReadword = &readword.c;
+	
+	for (i = 0; i < numBytes; i++)
+	{
+		*pReadword++ = *(flash_make_addr (blkAddr, offset+i));
+	}
+	
+	return readword;
+}
+
+Bool flash_data_isequal (uint32_t blkAddr, uint32_t offset, uint32_t val)
+{
+	FLASHData testword_a, testword_b;
+	Bool retval = FALSE;
+
+	testword_a.l = val;
+	testword_b.l = flash_read_data(blkAddr, offset);
+	
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = (testword_a.c == testword_b.c);
+		break;
+        case BUS_16BIT:
+		retval = (testword_a.w == testword_b.w);
+		break;
+	}
+	return retval;
+}
+
+Bool flash_CFI_isequal (uint32_t blkAddr, uint32_t offset, uint8_t val)
+{
+	volatile FLASHPtr addr;
+	FLASHData testword;
+	
+	Bool retval = TRUE;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( val, &testword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = (testword.c == *addr.cp);
+		break;
+        case BUS_16BIT:
+		retval = (testword.w == *addr.wp);
+		break;
+	}
+	return retval;
+}
+
+Bool flash_issetall (uint32_t blkAddr, uint32_t offset, uint8_t mask)
+{
+	volatile FLASHPtr addr;
+	FLASHData maskword;
+	maskword.l = 0x00000000;
+	
+	Bool retval = TRUE;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( mask, &maskword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = ((maskword.c & *addr.cp) == maskword.c);
+		break;
+        case BUS_16BIT:
+		retval = ((maskword.w & *addr.wp) == maskword.w);
+		break;
+	}
+	return retval;
+}
+
+Bool flash_issetsome (uint32_t blkAddr, uint32_t offset, uint8_t mask)
+{
+	volatile FLASHPtr addr;
+	FLASHData maskword;
+	
+	Bool retval = TRUE;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( mask, &maskword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = (maskword.c & *addr.cp);
+		break;
+        case BUS_16BIT:
+		retval = (maskword.w & *addr.wp);
+		break;
+	}
+	return retval;
+}
+
+//Initialize the AEMIF subsystem and settings
+uint32_t NOR_Init()
+{
+	uint8_t width = ( ( (SYSTEM->BOOTCFG) >> 5) & 0x1 );
+
+	// Select ASYNC EMIF Address Lines
+	SYSTEM->PINMUX[0] = 0xC1F;
+
+	// Program Asynchronous Wait Cycles Configuration Control Register
+#warning "To check: AEMIF->AWCCR |= 0x0;"
+	AEMIF->AWCCR |= 0x0;
+
+	// Program Asynchronous Bank3-5 Register
+	AEMIF->A1CR = 0x3FFFFFFC | width;
+	AEMIF->A2CR = 0x3FFFFFFC | width;
+	AEMIF->A3CR = 0x3FFFFFFC | width;
+	AEMIF->A4CR = 0x3FFFFFFC | width;
+    
+	/*AEMIF->A1CR = 0
+	  | ( 0 << 31 ) // selectStrobe      = 0;
+	  | ( 0 << 30 ) // extWait           = 0;
+	  | ( 0 << 26 ) // writeSetup        = 0;    //   0 ns
+	  | ( 3 << 20 ) // writeStrobe       = 3;    //  35 ns
+	  | ( 0 << 17 ) // writeHold         = 0;    //   0 ns
+	  | ( 3 << 13 ) // readSetup         = 3;    //  30 ns
+	  | ( 10<< 7 )  // readStrobe        = 10;   // 120 ns
+	  | ( 0 << 4 )  // readHold          = 0;    //   0 ns
+	  | ( 3 << 2 )  // turnAround        = 3;    //  ?? ns ( MAX TIMEOUT )
+	  | ( 1 << 0 )  // asyncSize         = 1;    // 16-bit bus
+	  ;*/
+                
+	//Init the FlashInfo structure
+	gNorInfo.flashBase = (uint32_t) &(__NORFlash);
+    
+	// Set width to 8 or 16
+	gNorInfo.busWidth = (width)?BUS_16BIT:BUS_8BIT;
+    
+	// Perform CFI Query
+	if (QueryCFI(gNorInfo.flashBase) == E_PASS)
+	{
+		// Below is specifically needed to check for AMD flash on DVEVM (rev. D or earlier)
+		// since it's top address line is not connected (don't ask me why)
+		if (gNorInfo.numberRegions == 1)
+		{
+			if ( QueryCFI( gNorInfo.flashBase+(gNorInfo.flashSize>>1) ) == E_PASS )     
+			{
+				gNorInfo.flashSize >>= 1;
+				gNorInfo.numberBlocks[0] >>= 1;
+			}
+		}
+	}
+	else
+	{
+		log_info("CFI query failed.");
+		return E_FAIL;
+	}
+    
+	// Setup function pointers
+    
+	log_info("NOR Initialization:");
+    
+	uart_send_str("  Command Set: ");
+	switch (gNorInfo.commandSet)
+	{
+        case AMD_BASIC_CMDSET:
+        case AMD_EXT_CMDSET:
+		Flash_Erase          = &AMD_Erase;
+		Flash_BufferWrite    = &AMD_BufferWrite;
+		Flash_Write          = &AMD_Write;
+		Flash_ID             = &AMD_ID;
+		log_info("AMD");
+		break;
+        case INTEL_BASIC_CMDSET:
+        case INTEL_EXT_CMDSET:
+		Flash_Erase          = &Intel_Erase;
+		Flash_BufferWrite    = &Intel_BufferWrite;
+		Flash_Write          = &Intel_Write;
+		Flash_ID             = &Intel_ID;
+		log_info("Intel");
+		break;
+        default:
+		Flash_Write          = &Unsupported_Write;
+		Flash_BufferWrite    = &Unsupported_BufferWrite;
+		Flash_Erase          = &Unsupported_Erase;
+		Flash_ID             = &Unsupported_ID;
+		log_info("Unknown");
+		break;
+	}
+    
+	if ( (*Flash_ID)(gNorInfo.flashBase) != E_PASS)
+	{
+		log_info("NOR ID failed.");
+		return E_FAIL;
+	}
+        
+	uart_send_str("  Manufacturer: ");
+	switch(gNorInfo.manfID)
+	{
+        case AMD:
+		uart_send_str("AMD");
+		break;
+        case FUJITSU:
+		uart_send_str("FUJITSU");
+		break;
+        case INTEL:
+		uart_send_str("INTEL");
+		break;
+        case MICRON:
+		uart_send_str("MICRON");
+		break;
+        case SAMSUNG:
+		uart_send_str("SAMSUNG");
+		break;
+        case SHARP:
+		uart_send_str("SHARP");
+		break;
+        default:
+		uart_send_str("Unknown");
+		break;
+	}
+	uart_send_lf();
+	uart_send_str("  Size (in bytes): ");
+	uart_send_hexnum(gNorInfo.flashSize, 8);
+	uart_send_lf();
+    
+	return E_PASS;    
+}
+
+// Query the chip to check for CFI table and data
+uint32_t QueryCFI( uint32_t baseAddress )
+{                
+	int32_t i;
+	uint32_t blkVal; 
+    
+	// Six possible NOR Flash Configurations of DM644x
+	//  1) Bus in x8 mode, x8 only device
+	//  2) Bus in x8 mode, single x8/x16 flash operating in x8 mode
+	//  3) Bus in x16 mode, single x8/x16 or x16-only flash operating in x16 mode
+	//  4) Bus in x16 mode, two x8 flash operating in parallel.
+	//  5) Bus in x16 mode, two x8/x16 flash, each in x8 mode, operating in parallel 
+	//  6) Bus in x16 mode, single x16/x32 flash operating in x16 mode
+	
+	for (gNorInfo.chipOperatingWidth = BUS_8BIT; gNorInfo.chipOperatingWidth <= gNorInfo.busWidth;  gNorInfo.chipOperatingWidth <<= 1)
+	{
+		for (gNorInfo.maxTotalWidth = gNorInfo.busWidth; gNorInfo.maxTotalWidth <= (gNorInfo.busWidth*2); gNorInfo.maxTotalWidth <<= 1)
+		{
+			// Specify number of devices
+			gNorInfo.numberDevices = 0;
+			while ( gNorInfo.numberDevices * gNorInfo.chipOperatingWidth < gNorInfo.busWidth)
+				gNorInfo.numberDevices++;
+                                    
+			// Enter the CFI Query mode
+			flash_write_cmd (baseAddress, 0, CFI_EXIT_CMD);
+			flash_write_cmd (baseAddress, CFI_QRY_CMD_ADDR, CFI_QRY_CMD);
+            
+			// Check for Query QRY values
+			if ( flash_CFI_isequal ( baseAddress, CFI_Q, 'Q') && 
+			     flash_CFI_isequal ( baseAddress, CFI_R, 'R') && 
+			     flash_CFI_isequal ( baseAddress, CFI_Y, 'Y') )
+			{               
+				gNorInfo.commandSet = (CMDSET) (flash_read_CFI_bytes(baseAddress,CFI_CMDSET,2).w);
+				gNorInfo.flashSize = 0x1 << flash_read_CFI_bytes(baseAddress,CFI_DEVICESIZE,1).c * gNorInfo.numberDevices;
+				gNorInfo.numberRegions = flash_read_CFI_bytes(baseAddress,CFI_NUMBLKREGIONS,1).c;
+				gNorInfo.bufferSize = 0x1 << flash_read_CFI_bytes(baseAddress,CFI_WRITESIZE,2).w * gNorInfo.numberDevices;
+                
+				// Get info on sector sizes in each erase region of device
+				for (i = 0;i < gNorInfo.numberRegions; i++)
+				{    
+					blkVal = flash_read_CFI_bytes(baseAddress,(CFI_BLKREGIONS+i*CFI_BLKREGIONSIZE),4).l;
+					gNorInfo.numberBlocks[i] = (blkVal&0x0000FFFF) + 1;
+					gNorInfo.blockSize[i]    = ((blkVal&0xFFFF0000) ? ( ((blkVal>>16)&0xFFFF) * 256) : 128) * gNorInfo.numberDevices;
+				}
+                
+				// Exit CFI mode 
+				flash_write_cmd (baseAddress, 0, CFI_EXIT_CMD);
+			    
+				return E_PASS;
+			}
+		}        
+	}
+    
+	flash_write_cmd (baseAddress, 0, CFI_EXIT_CMD);   
+	return E_FAIL;
+}
+
+
+// -------------------------------------------------------------------------
+// Manufacturer Specific Commands
+// -------------------------------------------------------------------------
+
+// ------------------------  Default Empty  ---------------------------
+uint32_t Unsupported_Write( uint32_t address, volatile uint32_t data)
+{
+	return E_FAIL;
+}
+uint32_t Unsupported_BufferWrite(uint32_t address, volatile uint8_t data[], uint32_t length )
+{
+	return E_FAIL;
+}
+uint32_t Unsupported_Erase(uint32_t address)
+{
+	return E_FAIL;
+}
+
+uint32_t Unsupported_ID(uint32_t address)
+{
+	return E_FAIL;
+}
+
+
+// -------------------- Begin of Intel specific commands -----------------------
+
+//ID flash
+uint32_t Intel_ID( uint32_t baseAddress )
+{
+	// Intel Exit back to read array mode
+	Intel_Soft_Reset_Flash();
+    
+	// Write ID command
+	flash_write_cmd(baseAddress, 0, INTEL_ID_CMD);
+        
+	//Read Manufacturer's ID
+	gNorInfo.manfID = (enum flash_manufacturer_id_t) flash_read_data(baseAddress, INTEL_MANFID_ADDR);
+    
+	// Read Device ID
+	gNorInfo.devID1 = (uint16_t) (enum flash_manufacturer_id_t) flash_read_data(baseAddress, INTEL_DEVID_ADDR);
+	gNorInfo.devID2 = 0x0000;
+        
+	// Intel Exit back to read array mode
+	Intel_Soft_Reset_Flash(); 
+    
+	return E_PASS;
+}
+
+// Reset back to Read array mode
+void Intel_Soft_Reset_Flash()
+{
+	// Intel Exit back to read array mode
+	flash_write_cmd(gNorInfo.flashBase,0,INTEL_RESET);
+}
+
+// Clear status register
+void Intel_Clear_Status()
+{
+	// Intel clear status
+	flash_write_cmd(gNorInfo.flashBase,0,INTEL_CLEARSTATUS_CMD);
+}
+
+// Remove block write protection
+uint32_t Intel_Clear_Lock(volatile uint32_t blkAddr)
+{
+
+	// Write the Clear Lock Command
+	flash_write_cmd(blkAddr,0,INTEL_LOCK_CMD0);
+
+	flash_write_cmd(blkAddr,0,INTEL_UNLOCK_BLOCK_CMD);
+
+	// Check Status
+	return Intel_Lock_Status_Check();
+}
+
+// Write-protect a block
+uint32_t Intel_Set_Lock(volatile uint32_t blkAddr)
+{
+	// Write the Set Lock Command	
+	flash_write_cmd(blkAddr,0,INTEL_LOCK_CMD0);            
+	
+	flash_write_cmd(blkAddr,0,INTEL_LOCK_BLOCK_CMD);
+
+	// Check Status
+	return Intel_Lock_Status_Check();
+}
+
+void Intel_Wait_For_Status_Complete()
+{
+	while ( !flash_issetall(gNorInfo.flashBase, 0, BIT7) );
+}
+
+uint32_t Intel_Lock_Status_Check()
+{
+	uint32_t retval = E_PASS;
+	//uint8_t status;
+
+	Intel_Wait_For_Status_Complete();
+
+	//status = flash_read_uint16((uint32_t)gNorInfo.flashBase,0);
+	//if ( status & BIT5 )
+	if (flash_issetsome(gNorInfo.flashBase, 0, (BIT5 | BIT3)))
+	{
+		retval = E_FAIL;
+		/*if ( status & BIT4 )
+		  {
+		  uart_send_str("Command Sequence Error\r\n");
+		  }
+		  else
+		  {
+		  uart_send_str("Clear Lock Error\r\n");
+		  }*/
+	}
+	/*if ( status & BIT3 )
+	  {
+	  retval = E_FAIL;
+	  //uart_send_str("Voltage Range Error\n");
+	  }*/
+	
+	// Clear status
+	Intel_Clear_Status();
+	
+	// Put chip back into read array mode.
+	Intel_Soft_Reset_Flash();
+	
+	// Set Timings back to Optimum for Read
+	return retval;
+}
+
+// Erase Block
+uint32_t Intel_Erase(volatile uint32_t blkAddr)
+{
+	uint32_t retval = E_PASS;
+	
+	// Clear Lock Bits
+	retval |= Intel_Clear_Lock(blkAddr);
+	
+	// Send Erase commands
+	flash_write_cmd(blkAddr,0,INTEL_ERASE_CMD0);
+	flash_write_cmd(blkAddr,0,INTEL_ERASE_CMD1);
+	
+	// Wait until Erase operation complete
+	Intel_Wait_For_Status_Complete();
+    
+	// Verify successful erase                       
+	if ( flash_issetsome(gNorInfo.flashBase, 0, BIT5) )
+		retval = E_FAIL;
+    
+	// Put back into Read Array mode.
+	Intel_Soft_Reset_Flash();
+	
+	return retval;
+}
+
+// Write data
+uint32_t Intel_Write( uint32_t address, volatile uint32_t data )
+{
+	uint32_t retval = E_PASS;
+	
+	// Send Write command
+	flash_write_cmd(address,0,INTEL_WRITE_CMD);
+	flash_write_data(address, data);
+                  
+	// Wait until Write operation complete
+	Intel_Wait_For_Status_Complete();
+	                          
+	// Verify successful program
+	if ( flash_issetsome(gNorInfo.flashBase, 0, (BIT4|BIT3)) )
+	{
+		//uart_send_str("Write Op Failed.\r\n");
+		retval = E_FAIL;
+	}
+    
+	// Lock the block
+	//retval |= Intel_Set_Lock(blkAddr);
+    
+	// Put back into Read Array mode.
+	Intel_Soft_Reset_Flash();
+                          
+	return retval;
+}
+
+// Buffer write data
+uint32_t Intel_BufferWrite(uint32_t address, volatile uint8_t data[], uint32_t numBytes )
+{
+	uint32_t startAddress = address;
+	uint32_t retval = E_PASS;
+	uint32_t timeoutCnt = 0, shift;
+
+	// Send Write_Buff_Load command   
+	do {
+		flash_write_cmd(address,0,INTEL_WRT_BUF_LOAD_CMD);
+		timeoutCnt++;
+	}while( (!flash_issetall(gNorInfo.flashBase, 0, BIT7)) && (timeoutCnt < 0x00010000) );
+    
+	if (timeoutCnt >= 0x10000)
+	{
+		//    uart_send_str("Write Op Failed.\r\n");
+		retval = E_TIMEOUT;
+	}
+	else
+	{
+		//Establish correct shift value
+		shift = 0;
+		while ((gNorInfo.busWidth >> shift) > 1)
+			shift++;
+    
+		// Write Length (either numBytes or numBytes/2)	    
+		flash_write_cmd(startAddress, 0, (numBytes >> shift) - 1);
+        
+		// Write buffer length
+		//flash_write_data(startAddress, (length - 1));
+        
+		// Write buffer data
+		flash_write_databuffer(&address,(void*)data,numBytes);
+                
+		// Send write buffer confirm command
+		flash_write_cmd(startAddress,0,INTEL_WRT_BUF_CONF_CMD);
+        
+		// Check status
+		Intel_Wait_For_Status_Complete();
+		// Verify program was successful
+        
+		//if ( flash_read_uint8(gNorInfo.flashBase,0) & BIT4 )
+		if ( flash_issetsome(gNorInfo.flashBase, 0, BIT4) )
+		{
+#ifdef NOR_DEBUG
+			log_info("Write Buffer Op Failed.");
+#endif
+			retval = E_FAIL;
+		}
+        
+		// Put back into Read Array mode.
+		Intel_Soft_Reset_Flash();
+	}
+                          
+	return retval;
+}
+// -------------------- End of Intel specific commands ----------------------
+
+
+// -------------------- Begin of AMD specific commands -----------------------
+// Identify the Manufacturer and Device ID 
+uint32_t AMD_ID( uint32_t baseAddress )
+{
+	// Exit back to read array mode
+	AMD_Soft_Reset_Flash();
+
+	// Write ID commands
+	AMD_Prefix_Commands();
+	flash_write_cmd(baseAddress, AMD_CMD2_ADDR, AMD_ID_CMD);
+
+	// Read manufacturer's ID
+	gNorInfo.manfID = (enum flash_manufacturer_id_t) flash_read_data(baseAddress, AMD_MANFID_ADDR);
+    
+	// Read device ID
+	gNorInfo.devID1 = (uint16_t) flash_read_data(baseAddress, AMD_DEVID_ADDR0);
+    
+	// Read additional ID bytes if needed
+	if ( (gNorInfo.devID1 & 0xFF ) == AMD_ID_MULTI )
+		gNorInfo.devID2 = flash_read_CFI_bytes(baseAddress, AMD_DEVID_ADDR1, 2).w;
+	else
+		gNorInfo.devID2 = 0x0000;
+        
+	// Exit back to read array mode
+	AMD_Soft_Reset_Flash();
+    
+	return E_PASS;
+}
+
+
+
+void AMD_Soft_Reset_Flash()
+{
+	// Reset Flash to be in Read Array Mode
+	flash_write_cmd(gNorInfo.flashBase,AMD_CMD2_ADDR,AMD_RESET);                  
+}
+
+// AMD Prefix Commands
+void AMD_Prefix_Commands()
+{
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD0_ADDR, AMD_CMD0);
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD1_ADDR, AMD_CMD1);
+}
+
+// Erase Block
+uint32_t AMD_Erase(uint32_t blkAddr)
+{
+	uint32_t retval = E_PASS;
+
+	// Send commands
+	AMD_Prefix_Commands();
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD2_ADDR, AMD_BLK_ERASE_SETUP_CMD);
+	AMD_Prefix_Commands();
+	flash_write_cmd(blkAddr, AMD_CMD2_ADDR, AMD_BLK_ERASE_CMD);
+	
+	// Poll DQ7 and DQ15 for status
+	while ( !flash_issetall(blkAddr, 0, BIT7) );
+    
+	// Check data 
+	if ( !flash_data_isequal(blkAddr, 0, AMD_BLK_ERASE_DONE) )
+		retval = E_FAIL;
+	
+	/* Flash Mode: Read Array */
+	AMD_Soft_Reset_Flash();
+    
+	return retval;
+}
+
+// AMD Flash Write
+uint32_t
+AMD_Write(uint32_t address, volatile uint32_t data)
+{
+	uint32_t retval = E_PASS;
+	
+	// Send Commands
+	AMD_Prefix_Commands();
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD2_ADDR, AMD_PROG_CMD);
+	flash_write_data(address, data);
+
+	// Wait for ready.
+	while(TRUE) {
+		if ((flash_read_data(address, 0 ) & (BIT7 | BIT15) ) == (data & (BIT7 | BIT15))) {
+			break;
+		} else {
+			if (flash_issetall(address, 0, BIT5)) {
+				if ((flash_read_data(address, 0 ) & (BIT7 | BIT15) ) != (data & (BIT7 | BIT15))) {
+					log_info("Timeout occurred.");
+					retval = E_FAIL;
+				}
+				break;				
+			}
+		}
+	}
+	
+	// Return Read Mode
+	AMD_Soft_Reset_Flash();
+	
+	// Verify the data.
+	if ((retval == E_PASS) && (flash_read_data(address, 0) != data))
+		retval = E_FAIL;
+	
+	return retval;
+}
+
+// AMD flash buffered write
+uint32_t
+AMD_BufferWrite(uint32_t address, volatile uint8_t data[], uint32_t numBytes)
+{
+	uint32_t startAddress = address;
+	uint32_t blkAddress, blkSize;
+	uint32_t data_temp;
+	uint32_t retval = E_PASS;
+	uint32_t shift;
+	
+	// Get block base address and size
+	DiscoverBlockInfo(address, &blkSize, &blkAddress);
+			
+	// Write the Write Buffer Load command
+	AMD_Prefix_Commands();
+	flash_write_cmd(blkAddress, 0, AMD_WRT_BUF_LOAD_CMD);
+        
+	//Establish correct shift value
+	shift = 0;
+	while ((gNorInfo.busWidth >> shift) > 1)
+		shift++;
+    
+	// Write Length (either numBytes or numBytes/2)	    
+	flash_write_cmd(blkAddress, 0, (numBytes >> shift) - 1);
+	
+	// Write Data
+	flash_write_databuffer(&address,(void*)data, numBytes);
+		
+	// Program Buffer to Flash Confirm Write
+	flash_write_cmd(blkAddress, 0, AMD_WRT_BUF_CONF_CMD);                  
+    
+	// Read last data item                  
+	data_temp = flash_read_data((uint32_t) (data + (address - startAddress)), 0);
+        
+	while (true) {
+		//temp1 = flash_read_data(address, 0 );   
+		if ((flash_read_data(address, 0 ) & (BIT7 | BIT15)) == (data_temp & (BIT7 | BIT15))) {
+			break;
+		} else {
+			// Timeout has occurred
+			if(flash_issetall(address, 0, BIT5)) {
+				if ((flash_read_data(address, 0 ) & (BIT7 | BIT15)) != (data_temp & (BIT7 | BIT15))) {
+					log_info("Timeout occurred.");
+					retval = E_FAIL;
+				}
+				break;
+			}
+			// Abort has occurred
+			if (flash_issetall(address, 0, BIT1)) {
+				if ((flash_read_data(address, 0 ) & (BIT7 | BIT15)) != (data_temp & (BIT7 | BIT15))) {
+					log_info("Abort occurred.");
+					retval = E_FAIL;
+					AMD_Write_Buf_Abort_Reset_Flash ();
+				}
+				break;
+			}
+		}
+	}
+	
+	// Put chip back into read array mode.
+	AMD_Soft_Reset_Flash();
+	if (retval == E_PASS)
+		retval = flash_verify_databuffer(startAddress,(void*)data, numBytes);
+
+	return retval;
+}
+
+// AMD Write Buf Abort Reset Flash
+void
+AMD_Write_Buf_Abort_Reset_Flash(void)
+{
+	// Reset Flash to be in Read Array Mode
+	AMD_Prefix_Commands();
+	AMD_Soft_Reset_Flash();
+}
+
+// Get info on block address and sizes
+uint32_t
+DiscoverBlockInfo(uint32_t address,uint32_t* blockSize, uint32_t* blockAddr)
+{
+	int32_t i;
+	uint32_t currRegionAddr, nextRegionAddr;
+        
+	currRegionAddr = (uint32_t) gNorInfo.flashBase;
+	if ((address < currRegionAddr) || (address >= (currRegionAddr+gNorInfo.flashSize))) {
+		return E_FAIL;
+	}
+    
+	for (i = 0; i < (gNorInfo.numberRegions); i++) {
+		nextRegionAddr = currRegionAddr + (gNorInfo.blockSize[i] * gNorInfo.numberBlocks[i]);
+		if ( (currRegionAddr <= address) && (nextRegionAddr > address) ) {
+			*blockSize = gNorInfo.blockSize[i];
+			*blockAddr = address & (~((*blockSize) - 1));
+			break;
+		}
+		currRegionAddr = nextRegionAddr;
+	}
+
+	return E_PASS;
+}
+
+uint32_t
+NOR_GlobalErase(void)
+{
+	return NOR_Erase((volatile uint32_t) gNorInfo.flashBase, (volatile uint32_t) gNorInfo.flashSize);
+}
+
+
+uint32_t
+nor_get_flashbase(void)
+{
+	return gNorInfo.flashBase;
+}
+
+uint32_t
+NOR_Erase(volatile uint32_t start_address, volatile uint32_t size)
+{
+	volatile uint32_t addr  = start_address;
+	volatile uint32_t range = start_address + size;
+	uint32_t blockSize, blockAddr;
+	
+	log_info("Erasing the NOR Flash");
+	
+   	while (addr < range) {
+		if (DiscoverBlockInfo(addr, &blockSize, &blockAddr) != E_PASS) {
+			uart_send_str("Address out of range");
+			return E_FAIL;
+		}
+		
+		//Increment to the next block
+		if ( (*Flash_Erase)(blockAddr) != E_PASS) {
+			uart_send_str("Erase failure at block address ");
+			uart_send_hexnum(blockAddr, 8);
+			uart_send_lf();
+			return E_FAIL;
+		}
+		addr = blockAddr + blockSize;
+	    
+		// Show status messages
+		uart_send_str("Erased through ");
+		uart_send_hexnum(addr, 8);
+		uart_send_lf();
+  	}
+
+	log_info("Erase Completed");
+
+  	return(E_PASS);
+}
+
+// NOR_WriteBytes
+uint32_t
+NOR_WriteBytes(uint32_t writeAddress, uint32_t numBytes, uint32_t readAddress)
+{
+	uint32_t blockSize, blockAddr;
+	int i;
+	uint32_t retval = E_PASS;
+
+	log_info("Writing the NOR Flash");
+
+	// Make numBytes even if needed
+	if (numBytes & 0x00000001)
+		numBytes++;
+		
+	if (DiscoverBlockInfo(writeAddress, &blockSize, &blockAddr) != E_PASS) {
+		uart_send_str("Address out of range");
+		return E_FAIL;
+	}
+
+	while (numBytes > 0) {
+		if ( (numBytes < gNorInfo.bufferSize) || (writeAddress & (gNorInfo.bufferSize-1) )) {
+			if ((*Flash_Write)(writeAddress, flash_read_data(readAddress,0) ) != E_PASS) {
+				log_info("\r\nNormal Write Failed.");
+				retval = E_FAIL;
+			} else {
+				numBytes     -= gNorInfo.busWidth;
+				writeAddress += gNorInfo.busWidth;
+				readAddress  += gNorInfo.busWidth;
+			}
+		} else {
+			// Try to use buffered writes
+			if ((*Flash_BufferWrite)(writeAddress, (volatile uint8_t *)readAddress, gNorInfo.bufferSize) == E_PASS) {
+				numBytes -= gNorInfo.bufferSize;
+				writeAddress += gNorInfo.bufferSize;
+				readAddress  += gNorInfo.bufferSize;
+			}
+			else {
+				// Try normal writes as a backup
+				for (i = 0; i<(gNorInfo.bufferSize>>1); i++) {
+					if ((*Flash_Write)(writeAddress, flash_read_data(readAddress,0) ) != E_PASS) {
+						log_info("\r\nNormal write also failed");
+						retval = E_FAIL;
+						break;
+					} else {
+						numBytes     -= gNorInfo.busWidth;
+						writeAddress += gNorInfo.busWidth;
+						readAddress  += gNorInfo.busWidth;
+					}
+				}
+			}
+		}
+
+		// Output status info on the write operation
+		if (retval == E_PASS) {    
+			if  ( ((writeAddress & (~((blockSize>>4)-1))) == writeAddress) || (numBytes == 0) ) {
+				uart_send_str("NOR Write OK through ");
+				uart_send_hexnum(writeAddress, 8);
+				uart_send_lf();
+        		
+				if (DiscoverBlockInfo(writeAddress, &blockSize, &blockAddr) != E_PASS) {
+					uart_send_str("Address out of range");
+					return E_FAIL;
+				}
+			}
+		} else {
+			log_info("NOR Write Failed... Aborting");
+			return E_FAIL;
+		}
+  	}
+
+  	return retval;
+}
diff --git a/nor.h b/nor.h
new file mode 100644
index 0000000..49ccd6c
--- /dev/null
+++ b/nor.h
@@ -0,0 +1,44 @@
+/*
+ * nor.h - NOR flash definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+ 
+#ifndef _NOR_H_
+#define _NOR_H_
+
+#include "common.h"
+#include "davinci.h"
+
+/* Global NOR commands */
+uint32_t NOR_Init (void);
+
+int nor_copy(uint32_t *jump_entry_point);
+
+uint32_t NOR_WriteBytes(uint32_t writeAddress, uint32_t numBytes, uint32_t readAddress);
+uint32_t NOR_GlobalErase(void);
+uint32_t NOR_Erase(uint32_t start_address, uint32_t size);
+uint32_t DiscoverBlockInfo(uint32_t address,uint32_t* blockSize, uint32_t* blockAddr);
+
+
+uint32_t nor_get_flashbase(void);
+
+
+#endif /* _NOR_H_ */
diff --git a/norboot.c b/norboot.c
new file mode 100644
index 0000000..816582c
--- /dev/null
+++ b/norboot.c
@@ -0,0 +1,72 @@
+/*
+ * norboot.c - NOR boot mode functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "nor.h"
+#include "util.h"
+#include "uart.h"
+
+extern NOR_INFO gNorInfo;
+
+/* Function to find out where the Application is and copy to DRAM */
+int
+nor_copy(uint32_t *jump_entry_point)
+{
+	volatile struct nor_boot_t *hdr = 0;
+	volatile uint32_t *appStartAddr = 0;
+	volatile uint32_t count = 0;
+	volatile uint32_t *ramPtr = 0;
+	uint32_t blkSize, blkAddress;
+
+	if (NOR_Init() != E_PASS)
+		return E_FAIL;
+	    
+	DiscoverBlockInfo((gNorInfo.flashBase + UBL_IMAGE_SIZE), &blkSize,
+			  &blkAddress);
+	
+	hdr = (volatile struct nor_boot_t *) (blkAddress + blkSize);
+
+	/* Check for valid magic number. */
+	if ((hdr->magicNum & 0xFFFFFF00) != MAGIC_NUMBER_VALID) {
+		log_fail("No valid header found");
+	 	return E_FAIL;
+	}
+
+	/* Set the source address for copy */
+	appStartAddr = (uint32_t *)(((uint8_t*) hdr) + sizeof(struct nor_boot_t));
+
+	if (hdr->magicNum == UBL_MAGIC_BIN_IMG) {
+		log_fail("Unsupported image format");
+		return E_FAIL;
+	}
+
+	ramPtr = (uint32_t *) hdr->ldAddress;
+
+	/* Copy data to RAM */
+	memcpy(ramPtr, appStartAddr, hdr->appSize);
+
+	/* Application was read correctly, so set entrypoint */
+	*jump_entry_point = hdr->entryPoint;
+
+ 	return E_PASS;
+}
diff --git a/uart.c b/uart.c
new file mode 100644
index 0000000..7baefb9
--- /dev/null
+++ b/uart.c
@@ -0,0 +1,287 @@
+/*
+ * uart.c - UART Rx and Tx functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "uart.h"
+#include "util.h"
+#include "crc.h"
+#include "gunzip.h"
+
+/* Symbol from linker script */
+extern uint32_t __DDR_FREE; /* Start of free DDR memory region. */
+extern uint32_t __DDR_END;  /* Last DDR memory address. */
+
+/* Receive data from UART */
+static int
+uart_recv_bytes(size_t count, uint8_t *dest)
+{
+	uint32_t i, status = 0;
+	uint32_t timerStatus = 1;
+
+	for (i = 0; i < count; i++) {
+		/* Enable timer one time */
+		timer0_start();
+		do {
+			status = (UART0->LSR)&(0x01);
+			timerStatus = timer0_status();
+		} while (!status && timerStatus);
+
+		if (timerStatus == 0) {
+			host_msg("UART_TIMEOUT");
+			return E_TIMEOUT;
+		}
+
+		/* Receive byte */
+		dest[i] = (UART0->RBR) & 0xFF;
+
+		/* Check status for errors */
+		if ((UART0->LSR & 0x1C) != 0) {
+			host_msg("UART_RXERR");
+			return E_FAIL;
+		}
+	}
+	return E_PASS;
+}
+
+/* Send bytes with optional null terminating character. */
+static void
+uart_send_bytes(char *string)
+{
+	uint32_t status = 0;
+	int32_t i, count;
+	uint32_t timerStatus = 1;
+
+	count = strlen(string);
+
+	for (i = 0; i < count; i++) {
+		/* Enable Timer one time */
+		timer0_start();
+		do {
+			status = (UART0->LSR)&(0x20);
+			timerStatus = timer0_status();
+		} while (!status && timerStatus);
+
+		if (timerStatus == 0)
+			return; /* E_TIMEOUT */
+
+		/* Send byte */
+		(UART0->THR) = string[i];
+	}
+}
+
+/* Check if the given string is received via UART */
+static int
+uart_check_string(char *string, int include_null)
+{
+	int i, count;
+
+	count = strlen(string);
+	if (include_null != false)
+		count++;
+
+	for (i = 0; i < count; i++) {
+		uint8_t recv;
+
+		/* Get one byte */
+		if (uart_recv_bytes(1, &recv) != E_PASS)
+			return E_FAIL;
+
+		if (recv != string[i])
+			return E_FAIL;
+	}
+	return E_PASS;
+}
+
+/* Receive a uint32 value in HEX form (8 bytes) */
+static int
+uart_recv_hex_uint32(uint32_t *data)
+{
+	int k;
+	uint8_t recv[8];
+	uint32_t temp;
+	int shift;
+	const int num_ascii_char = 8;
+
+	/* Get 8 bytes from UART */
+	if (uart_recv_bytes(num_ascii_char, recv) != E_PASS)
+		return E_FAIL;
+
+	*data = 0;
+
+	/* Converting ascii to Hex */
+	for (k = 0, shift = 28; k < num_ascii_char; k++, shift -= 4) {
+		temp = recv[k] - 48;
+
+		if (temp > 22) /* Lower case a,b,c,d,e,f */
+			temp -= 39;
+		else if (temp > 9) /* Upper case A,B,C,D,E,F */
+			temp -= 7;
+
+		*data |= temp << shift;
+	}
+	return E_PASS;
+}
+
+/* Send line feed (\n) to UART. */
+void
+uart_send_lf(void)
+{
+	uart_send_bytes("\r\n");
+}
+
+/* Send a string to UART, without line feed. */
+void
+uart_send_str(char *string)
+{
+	uart_send_bytes(string);
+}
+
+/* Send a string to UART, with line feed. */
+void
+uart_send_str_lf(char *string)
+{
+	uart_send_bytes(string);
+	uart_send_lf();
+}
+
+void
+uart_send_hexnum(uint32_t value, int digits)
+{
+	char seq[9];
+	uint32_t i, shift;
+	uint8_t temp;
+
+	for (i = 0; i < digits; i++) {
+		shift = ((digits - 1) - i) * 4;
+		temp = (value >> shift) & 0x0F;
+		if (temp > 9)
+			temp += 7;
+		seq[i] = temp + 48;
+	}
+	seq[digits] = 0;
+
+	uart_send_str("0x");
+	uart_send_bytes(seq);
+}
+
+int
+uart_get_cmd(uint32_t *boot_cmd)
+{
+	if (uart_check_string("    CMD", true) != E_PASS)
+		return E_FAIL;
+
+	if (uart_recv_hex_uint32(boot_cmd) != E_PASS)
+		return E_FAIL;
+
+	return E_PASS;
+}
+
+uint32_t
+uart_get_prog(struct uart_ack_header_t *uart_ack_header)
+{
+	uint32_t error;
+	uint32_t recv_crc, computed_crc;
+	unsigned long inflate_dstbuf_len, inflate_srcbuf_len;
+	uint8_t *ddr_free = (uint8_t *) &__DDR_FREE;
+
+	uart_ack_header->recv_buffer = ddr_free;
+	uart_ack_header->inflate_dstbuf = ddr_free + MAX_IMAGE_SIZE;
+	inflate_dstbuf_len = ((uint8_t *) &__DDR_END) + 1 - uart_ack_header->inflate_dstbuf;
+
+	/* Send ACK command */
+	error = uart_check_string("    ACK", true);
+	if (error != E_PASS)
+		return E_FAIL;
+
+	/* Get the ACK header elements */
+	error =  uart_recv_hex_uint32(&uart_ack_header->magic);
+	error |= uart_recv_hex_uint32(&recv_crc);
+	error |= uart_recv_hex_uint32(&uart_ack_header->size);
+	error |= uart_recv_hex_uint32(&uart_ack_header->entry_point);
+	error |= uart_check_string("0000", false);
+	if (error != E_PASS)
+		return E_FAIL;
+
+	uart_send_str("Magic = ");
+	uart_send_hexnum(uart_ack_header->magic, 8);
+	uart_send_str(", CRC = ");
+	uart_send_hexnum(recv_crc, 8);
+	uart_send_str(", Entry = ");
+	uart_send_hexnum(uart_ack_header->entry_point, 8);
+	uart_send_str(", Size = ");
+	uart_send_hexnum(uart_ack_header->size, 8);
+	uart_send_lf();
+
+	/* Verify that the file size is appropriate */
+	if ((uart_ack_header->size == 0) ||
+	    (uart_ack_header->size > MAX_IMAGE_SIZE)) {
+		host_msg("BADCNT");
+		return E_FAIL;
+	}
+
+	/* Send BEGIN command */
+	host_msg("BEGIN");
+
+	/* Receive the data over UART */
+	if (uart_recv_bytes(uart_ack_header->size,
+			    uart_ack_header->recv_buffer)
+	    != E_PASS) {
+		return E_FAIL;
+	}
+
+	/* Return first DONE when all data arrives */
+	host_msg("DONE");
+
+	computed_crc = crc32_dv_compute(uart_ack_header->recv_buffer,
+					uart_ack_header->size);
+	if (computed_crc != recv_crc) {
+		host_msg("BADCRC");
+		return E_FAIL;
+	}
+
+	inflate_srcbuf_len = uart_ack_header->size - GUNZIP_COMP_BLOCK_OFFSET;
+
+	error = gunzip(uart_ack_header->inflate_dstbuf, &inflate_dstbuf_len,
+		       &uart_ack_header->recv_buffer[GUNZIP_COMP_BLOCK_OFFSET],
+		       &inflate_srcbuf_len);
+	if (error != 0) {
+		uart_send_str("gzip error = ");
+		uart_send_hexnum(error, 8);
+		uart_send_lf();
+		host_msg("GZIPERR");
+		return E_FAIL;
+	}
+
+	uart_send_str("Deflated data size = ");
+	uart_send_hexnum(inflate_dstbuf_len, 8);
+	uart_send_lf();
+
+	uart_ack_header->recv_buffer = uart_ack_header->inflate_dstbuf;
+	uart_ack_header->size = inflate_dstbuf_len;
+
+	/* Return DONE when all data is validated */
+	host_msg("DONE");
+
+	return E_PASS;
+}
diff --git a/uart.h b/uart.h
new file mode 100644
index 0000000..b9f0316
--- /dev/null
+++ b/uart.h
@@ -0,0 +1,48 @@
+/*
+ * uart.h - UART Rx and Tx definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _UART_H_
+#define _UART_H_
+
+#include "common.h"
+
+struct uart_ack_header_t {
+	uint32_t magic;
+	uint32_t entry_point;
+	uint32_t size;
+	uint8_t  *recv_buffer;
+	uint8_t  *inflate_dstbuf;
+};
+
+void uart_boot(uint32_t *jump_entry_point);
+
+void uart_send_lf(void);
+void uart_send_str(char *string);
+void uart_send_str_lf(char *string);
+void uart_send_hexnum(uint32_t value, int digits);
+
+int uart_get_cmd(uint32_t *boot_cmd);
+
+uint32_t uart_get_prog(struct uart_ack_header_t *uart_ack_header);
+
+#endif /* _UART_H_ */
diff --git a/uartboot.c b/uartboot.c
new file mode 100644
index 0000000..f0437c0
--- /dev/null
+++ b/uartboot.c
@@ -0,0 +1,261 @@
+/*
+ * uartboot.c - UART boot mode
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "uart.h"
+#include "util.h"
+#include "crc.h"
+#if defined(FLASH_TYPE_NOR)
+#include "nor.h"
+#elif defined(FLASH_TYPE_NAND)
+#include "nand.h"
+#endif
+
+/* Symbols from linker script */
+extern uint32_t __DDR_START;
+extern uint32_t __DDR_SIZE;
+
+static int
+ddr_memory_test(void)
+{
+	int k;
+	volatile uint32_t *ddr_start = &__DDR_START;
+	const uint32_t ddr_size32 = ((uint32_t) &__DDR_SIZE) / 4;
+	uint32_t read32;
+
+	log_info("DDR tests");
+
+	log_info("1. RAMP test:");
+	for (k = 0; k < ddr_size32; k++)
+		ddr_start[k] = k; /* Write */
+
+	for (k = 0; k < ddr_size32; k++) {
+		read32 = ddr_start[k]; /* Read */
+		if (read32 != k)
+			goto error;
+	}
+	log_info("  Success");
+
+	log_info("2. PATTERN test:");
+	for (k = 0; k < ddr_size32; k++)
+		ddr_start[k] = DDR_TEST_PATTERN; /* Write */
+
+	for (k = 0; k < ddr_size32; k++) {
+		read32 = ddr_start[k]; /* Read */
+		if (read32 != DDR_TEST_PATTERN)
+			goto error;
+	}
+	log_info("  Success");
+
+	host_msg("DDRTEST_SUCCESS");
+	return 0;
+
+error:
+	uart_send_str("Failed at address: ");
+	uart_send_hexnum(k * 4, 8);
+	uart_send_str(", Expected: ");
+	uart_send_hexnum(k, 8);
+	uart_send_str(", Read: ");
+	uart_send_hexnum(read32, 8);
+	uart_send_lf();
+	host_msg("DDRTEST_FAILURE");
+	return -1;
+}
+
+void
+uart_boot(uint32_t *jump_entry_point)
+{
+#if defined(FLASH_TYPE_NAND)
+	struct nand_image_descriptor_t im_desc;
+#elif defined(FLASH_TYPE_NOR)
+	struct nor_boot_t norBoot;
+	uint32_t blkAddress, blkSize, baseAddress;
+#endif
+	struct uart_ack_header_t uart_ack_header;
+	uint32_t boot_cmd;
+
+	crc32_dv_build_table();
+	log_info("Starting UART Boot");
+	host_msg("BOOTPSP");
+
+	/* Get the BOOT command */
+	if (uart_get_cmd(&boot_cmd) != E_PASS)
+		goto uartboot_error;
+
+	/* Set the entry point to reset by default */
+	*jump_entry_point = 0x0;
+
+	switch (boot_cmd) {
+	case UBL_CMD_DDR_TEST:
+		/* Perform DDR memory testing. */
+		ddr_memory_test();
+		break;
+
+		/* Download via UART UBL and APP and burn to flash. */
+	case UBL_CMD_FLASH_UBL_APP:
+		host_msg("SENDUBL");
+
+		/* Download UBL into GZIP format */
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		log_info("Writing UBL");
+
+#if defined(FLASH_TYPE_NOR)
+		NOR_Erase(nor_get_flashbase(), uart_ack_header.size);
+
+		/* Write binary UBL to NOR flash. */
+		NOR_WriteBytes(nor_get_flashbase(), uart_ack_header.size,
+			       (uint32_t) uart_ack_header.recv_buffer);
+#elif defined(FLASH_TYPE_NAND)
+		im_desc.magic = uart_ack_header.magic;
+		im_desc.entry_point = uart_ack_header.entry_point;
+		im_desc.block_num = START_UBL_BLOCK_NUM;
+		im_desc.load_address = 0; /* Load address not used by RBL */
+
+		if (nand_write_prog(&im_desc, uart_ack_header.recv_buffer,
+				    uart_ack_header.size) != E_PASS)
+			goto uartboot_error;
+#endif
+
+		/* Indicate that UBL flashing was successfull. */
+		host_msg("DONE");
+
+		host_msg("SENDAPP");
+
+		/* Get the application header and data */
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		log_info("Writing APP");
+
+#if defined(FLASH_TYPE_NOR)
+		/* Erase the NOR flash where header and data will go */
+		DiscoverBlockInfo((nor_get_flashbase() + UBL_IMAGE_SIZE),
+				  &blkSize, &blkAddress);
+		baseAddress = blkAddress + blkSize;
+		NOR_Erase(baseAddress, uart_ack_header.size + sizeof(norBoot));
+
+		/* MagicFlag for Application (binary or safe) */
+		norBoot.magicNum = uart_ack_header.magic;
+
+		/* Bytes of application (either srec or binary) */
+		norBoot.appSize = uart_ack_header.size;
+
+		/* Value from ACK header */
+		norBoot.entryPoint = uart_ack_header.entry_point;
+
+		/* Semi-hardcoded load address to entry point. FIXME */
+		norBoot.ldAddress = uart_ack_header.entry_point;
+
+		/* Write the struct nor_boot_t header to the flash */
+		NOR_WriteBytes(baseAddress, sizeof(norBoot),
+			       (uint32_t) &norBoot);
+
+		/* Write the application data to the flash */
+		NOR_WriteBytes((baseAddress + sizeof(norBoot)),
+			       uart_ack_header.size,
+			       (uint32_t) uart_ack_header.recv_buffer);
+
+		/* Semi-hardcoded load address to entry point. FIXME */
+		if (nor_write_prog(&nor_boot, uart_ack_header.recv_buffer,
+				   uart_ack_header.size,
+				   baseAddress + sizeof(norBoot),
+				   uart_ack_header.magic,
+				   uart_ack_header.entry_point,
+				   uart_ack_header.entry_point) != E_PASS)
+			goto uartboot_error;
+#elif defined(FLASH_TYPE_NAND)
+		im_desc.magic = uart_ack_header.magic;
+		im_desc.entry_point = uart_ack_header.entry_point;
+		im_desc.block_num = START_APP_BLOCK_NUM;
+		/* Assuming load address is identical to entry point. */
+		im_desc.load_address = uart_ack_header.entry_point;
+
+		if (nand_write_prog(&im_desc, uart_ack_header.recv_buffer,
+				    uart_ack_header.size) != E_PASS)
+			goto uartboot_error;
+#endif
+
+		/* Indicate that APP flashing was successfull. */
+		host_msg("DONE");
+
+		break;
+
+	case UBL_CMD_FLASH_DATA:
+		host_msg("SENDDATA");
+
+		/* Get the data block infos and actual bytes */
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		log_info("Writing DATA");
+
+		im_desc.magic = uart_ack_header.magic;
+		im_desc.block_num = uart_ack_header.entry_point; /* Block in flash */
+
+		if (nand_write_prog(&im_desc, uart_ack_header.recv_buffer,
+				    uart_ack_header.size) != E_PASS)
+			goto uartboot_error;
+
+		/* Indicate that APP flashing was successfull. */
+		host_msg("DONE");
+
+		break;
+
+	case UBL_CMD_FLASH_ERASE:
+		log_info("Erasing whole flash");
+
+#if defined(FLASH_TYPE_NOR)
+		if (NOR_GlobalErase() != E_PASS) {
+			log_info("Erase failed");
+			goto uartboot_error;
+		}
+#elif defined(FLASH_TYPE_NAND)
+		if (nand_erase_all() != E_PASS) {
+			log_info("Erase failed");
+			goto uartboot_error;
+		}
+#endif
+
+		log_info("Erase successfull");
+
+		break;
+	default:
+		/* Load and run application */
+		host_msg("SENDAPP");
+
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		*jump_entry_point = uart_ack_header.entry_point;
+		break;
+	} /* end switch statement */
+
+	return;
+
+uartboot_error:
+	/* Set the entry point to reset. */
+	*jump_entry_point = 0x0;
+}
diff --git a/ubl.c b/ubl.c
new file mode 100644
index 0000000..0e7e1ed
--- /dev/null
+++ b/ubl.c
@@ -0,0 +1,251 @@
+/*
+ * ubl.c - main file
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "uart.h"
+#include "util.h"
+#if defined(FLASH_TYPE_NOR)
+#include "nor.h"
+#elif defined(FLASH_TYPE_NAND)
+#include "nand.h"
+#endif
+
+#define C1_IC	(1 << 12) /* icache off/on */
+
+static uint32_t jump_entry_point;
+
+enum bootmode_t bootmode;
+
+/* read co-processor 15, register #1 (control register) */
+static uint32_t
+read_p15_c1(void)
+{
+	uint32_t value;
+
+	__asm__ __volatile__(
+		"mrc    p15, 0, %0, c1, c0, 0   @ read control reg\n"
+		: "=r" (value)
+		:
+		: "memory");
+
+	return value;
+}
+
+/* write to co-processor 15, register #1 (control register) */
+static void
+write_p15_c1(uint32_t value)
+{
+	__asm__ __volatile__(
+		"mcr    p15, 0, %0, c1, c0, 0   @ write it back\n"
+		:
+		: "r" (value)
+		: "memory");
+
+	read_p15_c1();
+}
+
+static void
+cp_delay(void)
+{
+	volatile int i;
+
+	/* copro seems to need some delay between reading and writing */
+	for (i = 0; i < 100; i++)
+		;
+}
+
+static void
+icache_enable(void)
+{
+	uint32_t reg;
+
+	reg = read_p15_c1(); /* get control reg. */
+	cp_delay();
+	write_p15_c1(reg | C1_IC);
+}
+
+static int
+ubl_main(void)
+{
+	int status;
+
+	/* Read boot mode */
+	bootmode = (enum bootmode_t) (((SYSTEM->BOOTCFG) & 0xC0) >> 6);
+
+	/* Wait until the RBL is done using the UART. */
+	if (bootmode == NON_SECURE_UART)
+		while ((UART0->LSR & 0x40) == 0);
+
+	status = davinci_platform_init(UBL_VERSION_STR);
+	if (status != E_PASS)
+		goto error;
+
+#if defined(FLASH_TYPE_NOR)
+	status = NOR_Init();
+#elif defined(FLASH_TYPE_NAND)
+	status = nand_init();
+#endif
+	if (status != E_PASS) {
+		uart_send_str("flash init failed");
+		goto error;
+	}
+
+	uart_send_str("BootMode = ");
+
+	/* Select Boot Mode */
+	switch (bootmode) {
+#if defined(FLASH_TYPE_NAND)
+	case NON_SECURE_NAND:
+		log_info("NAND"); /* Report boot mode to host */
+
+		/* Copy binary application data from NAND to DDRAM */
+		if (nand_copy(&jump_entry_point) != E_PASS) {
+			log_info("Boot failed.");
+			goto UARTBOOT;
+		}
+		break;
+#elif defined(FLASH_TYPE_NOR)
+	case NON_SECURE_NOR:
+		log_info("NOR"); /* Report boot mode to host */
+
+		/* Copy binary application data from NOR to DDRAM */
+		if (nor_copy() != E_PASS) {
+			log_info("Boot failed.");
+			goto UARTBOOT;
+		}
+		break;
+#endif
+	case NON_SECURE_UART:
+		log_info("UART"); /* Report boot mode to host */
+		goto UARTBOOT;
+		break;
+	default:
+UARTBOOT:
+		uart_boot(&jump_entry_point);
+		break;
+	}
+
+	waitloop(10000);
+
+	/* Disabling UART timeout timer */
+	while ((UART0->LSR & 0x40) == 0)
+		;
+	TIMER0->TCR = 0x00000000;
+
+	return E_PASS;
+
+error:
+	jump_entry_point = 0; /* Reset */
+	return E_FAIL;
+}
+
+/*
+ * boot() has naked attribute (doesn't save registers since it is the entry
+ * point out of boot and it doesn't have an exit point). This setup requires
+ * that the gnu compiler uses the -nostdlib option.
+ */
+__attribute__((naked, section(".boot"))) void boot(void);
+
+void
+boot(void)
+{
+	void (*app_entry_function)(void);
+	extern uint32_t __topstack; /* symbol defined in linker script */
+	register uint32_t *stackpointer asm("sp");
+
+	asm(" MRS	r0, cpsr");
+	asm(" BIC	r0, r0, #0x1F"); /* Clear MODES */
+	asm(" ORR	r0, r0, #0x13"); /* Set SUPERVISOR mode */
+	asm(" ORR	r0, r0, #0xC0"); /* Disable FIQ and IRQ */
+	asm(" MSR	cpsr, r0");
+
+	/* Set the IVT to low memory, leave MMU & caches disabled */
+	asm(" MRC	p15, 0, r1, c1, c0, 0");
+	asm(" BIC	r0,r0,#0x00002000");
+	asm(" MCR	p15, 0, r1, c1, c0, 0");
+
+	/* Stack setup */
+	stackpointer = &(__topstack);
+
+	icache_enable();
+
+	/* Call to main code */
+	ubl_main();
+
+	uart_send_str("Starting app at: ");
+	uart_send_hexnum((uint32_t) jump_entry_point, 8);
+	uart_send_lf();
+
+	/* Jump to entry point */
+	app_entry_function = (void *) jump_entry_point;
+	(*app_entry_function)();
+}
+
+/*
+ * selfcopy() has naked attribute (doesn't save registers since it is the
+ * entry point when the UBL is found at the base of the NOR Flash and then
+ * goes  directly to the the boot() function, which is also naked). This setup
+ * requires that the gnu compiler uses the -nostdlib option.
+ */
+#if defined(FLASH_TYPE_NOR)
+__attribute__((naked, section(".selfcopy"))) void selfcopy(void);
+
+void
+selfcopy(void)
+{
+	volatile uint32_t *src = &(__selfcopysrc);
+	volatile uint32_t *dest = &(__selfcopydest);
+	volatile uint32_t *destend = &(__selfcopydestend);
+	extern uint32_t __selfcopysrc, __selfcopydest, __selfcopydestend;
+
+	/* Enable ITCM */
+	asm(" MRC	p15, 0, r0, c9, c1, 1");
+	asm(" MOV	r0, #0x1");
+	asm(" MCR	p15, 0, r0, c9, c1, 1");
+
+	/* Enable DTCM */
+	asm(" MRC	p15, 0, r0, c9, c1, 0");
+	asm(" MOV	r0, #0x8000");
+	asm(" ORR	r0, r0, #0x1");
+	asm(" MCR	p15, 0, r0, c9, c1, 0");
+
+	/* Copy the words */
+	while (dest < destend) {
+		*dest = *src;
+		dest++;
+		src++;
+	}
+
+	/* Jump to the normal entry point */
+	boot();
+}
+
+__attribute__ ((naked, section(".fakeentry"))) void fake_entry(void);
+
+void
+fake_entry(void)
+{
+	boot();
+}
+#endif /* FLASH_TYPE_NOR */
diff --git a/ubl.lds b/ubl.lds
new file mode 100644
index 0000000..f6af652
--- /dev/null
+++ b/ubl.lds
@@ -0,0 +1,103 @@
+/*
+ * dm35x.lds - DM35x linker script file
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+ENTRY(boot)
+SECTIONS {
+	/* Common definitions */
+	__EMIF_START  = 0x02000000;
+	__EMIF_SIZE   = 0x02000000;
+	__IRAM_START  = 0x00000000;
+	__DDR_START   = 0x80000000;
+
+	STACKStart = __DRAM_START + __DRAM_SIZE;	
+	__topstack = (__DRAM_START + __DRAM_SIZE) - 0x4;
+  
+	. = __EMIF_START;
+
+	__CS2start = .;
+	__NORFlash = .;
+	__NANDFlash = .;
+
+	.selfcopy : AT (0x0)
+	{
+		*(.selfcopy)		
+		. = ALIGN(256);
+	}
+	
+	__selfcopysrc = .;
+	__IVT = __DRAM_START;
+	__selfcopydest = __DRAM_START + 0x00000020;
+
+	. = 0x20;
+	.fakeentry : AT ( LOADADDR(.selfcopy) + SIZEOF(.selfcopy))
+	{
+		*(.fakeentry)
+		. = ALIGN(4);
+	}
+		
+	.text		: AT ( LOADADDR(.fakeentry) + SIZEOF(.fakeentry) )
+	{
+		*(.text)
+		. = ALIGN(4);
+	}
+	.boot		: AT ( LOADADDR(.text) + SIZEOF(.text))
+	{
+		*(.boot)
+		. = ALIGN(4);
+	}
+
+        /* Move into DRAM for placing const and data sections */
+        . += (__DRAM_START - __IRAM_START);
+
+	.rodata		: AT ( LOADADDR(.boot) + SIZEOF(.boot) )
+	{
+		*(.rodata*)
+		*(.rodata)
+		. = ALIGN(4);
+	}	
+
+	.data		: AT ( LOADADDR(.rodata) + SIZEOF(.rodata) )
+	{
+		*(.data)
+		. = ALIGN(4);
+	}
+			
+	__selfcopydestend = __selfcopydest + SIZEOF(.fakeentry) +
+						SIZEOF(.text) + SIZEOF(.boot) +
+						SIZEOF(.data) + SIZEOF(.rodata);
+	
+	.bss		:
+	{
+		*(.bss) *(COMMON)
+		. = ALIGN(4);
+	}
+
+	/* DDR2 */
+	. = __DDR_START;
+	.ddrram (NOLOAD) :
+	{
+		*(.ddrram)
+	}
+	__DDR_FREE = .;
+        __DDR_END  = __DDR_START + __DDR_SIZE - 1;
+}
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..2031126
--- /dev/null
+++ b/util.c
@@ -0,0 +1,83 @@
+/*
+ * util.c - miscellaneous functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "common.h"
+
+#define MAXSTRLEN 256
+
+/* Endian swap */
+void
+endian_data(uint32_t *data)
+{
+	uint32_t temp = *data;
+
+	*data = ENDIAN_SWAP(temp);
+}
+
+void *
+memcpy(void *dest, const void *src, size_t n)
+{
+	const uint8_t *src8 = src;
+	uint8_t *dst8 = dest;
+
+	for (; n > 0; n--) {
+		*dst8 = *src8;
+		dst8++;
+		src8++;
+	}
+
+	return dest;
+}
+
+/* Get string length by finding null terminating char */
+size_t
+strlen(const char *s)
+{
+	int i = 0;
+
+	while ((s[i] != 0) && (i < MAXSTRLEN))
+		i++;
+
+	if (i == MAXSTRLEN)
+		return -1;
+	else
+		return i;
+}
+
+/* Simple wait loop */
+void
+waitloop(int32_t loopcnt)
+{
+	for (; loopcnt > 0; loopcnt--)
+		asm("   NOP");
+}
+
+void
+sleep_ms(int ms)
+{
+	for (; ms > 0; ms--)
+		waitloop(20000);
+}
diff --git a/util.h b/util.h
new file mode 100644
index 0000000..e36bb47
--- /dev/null
+++ b/util.h
@@ -0,0 +1,35 @@
+/*
+ * util.h - miscellaneous functions prototypes
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _UTIL_H_
+#define _UTIL_H_
+
+#include <stdint.h>
+#include <string.h> /* For memcpy & memset prototypes */
+
+void endian_data(uint32_t *data);
+
+void waitloop(int32_t loopcnt);
+void sleep_ms(int ms);
+
+#endif /* _UTIL_H_ */