commit 4b779cf0d2743a6be48d33524e191ec4edcebf35
Author: Holger Hans Peter Freyther <holger@freyther.de>
Date:   Wed Apr 25 09:20:50 2012 +0200

    dvnixload/ubl copy as of pre 0.2.6 svn

diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..41e776c
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,18 @@
+v0.2.3
+	Mini-DAS: Enable DSP1 & DSP2 power.
+
+v0.2.4
+	Mini-DAS: Enable DSP1 & DSP2 power.
+	          Deactivate DSP1 & DSP2 reset lines.
+	
+
+v0.2.5
+	Mini-DAS: Enable all power supplys and deactivate
+	          all peripheral reset lines.
+
+v0.2.6
+	Mini-DAS: Add delay after applying DSP power and
+	          before releasing DSP reset lines.
+
+v0.2.7
+	Mini-DAS: Open-drain output for CAMERA RESET.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8f7379c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,201 @@
+#
+# Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ifndef CROSS_COMPILE
+CROSS_COMPILE=arm-linux-
+endif
+
+.PHONY : clean check
+
+CC=$(CROSS_COMPILE)gcc
+LD=$(CROSS_COMPILE)ld
+
+CFLAGS := -c -Os -Wall
+LDFLAGS := -Map ubl.map -nostdlib
+
+SOURCES := davinci.c uart.c uartboot.c ubl.c util.c gpio.c crc.c gunzip.c
+
+# Boards setup
+ifeq ($(BOARD),dvevm)
+# EVM for DM6446
+	PLATFORM   := DM644x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H64M16BT_3_162MHZ
+	DDR_SIZE   := 0x10000000 # 256MB
+endif
+ifeq ($(BOARD),sffsdr)
+	PLATFORM   := DM644x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H32M16BN_3_162MHZ
+	DDR_SIZE   := 0x08000000 # 128MB
+endif
+ifeq ($(BOARD),das)
+	PLATFORM   := DM644x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H64M16HR_3_162MHZ
+	DDR_SIZE   := 0x10000000 # 256MB
+endif
+ifeq ($(BOARD),minidas)
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H128M16HG_3IT_171MHZ
+	DDR_SIZE   := 0x10000000 # 256MB
+endif
+ifeq ($(BOARD),afeusb)
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H32M16BN_3_171MHZ
+	DDR_SIZE   := 0x04000000 # 64MB
+endif
+ifeq ($(BOARD),dm355evm)
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NAND
+	DDR_TYPE   := MICRON_MT47H64M16BT_37E_171MHZ
+	DDR_SIZE   := 0x08000000 # 128MB
+endif
+ifeq ($(BOARD),nor)
+# Only for testing NOR flash compilation
+	PLATFORM   := DM35x
+	FLASH_TYPE := FLASH_TYPE_NOR
+	DDR_TYPE   := MICRON_MT47H64M16BT_37E_171MHZ
+	DDR_SIZE   := 0x08000000 # 128MB
+endif
+
+# Generate a config.h file based on the board selected.
+# Only update this file if the selected board is different.
+OLDBOARD = $(shell cat config.h 2> /dev/null | grep "$(BOARD)")
+ifneq ($(OLDBOARD),$(BOARD))
+$(shell   echo "$(BOARD)" > config.h)
+endif
+
+CFLAGS += -D${PLATFORM} -D${FLASH_TYPE} -D$(DDR_TYPE) -Dboard_$(BOARD)
+
+# Processor type setup
+# The Instruction and Data accesses are differentiated via accessing different
+# memory map regions. The instruction region at 0x0000 and data region at
+# 0x8000 (0x10000 for DM35x) map to the same physical TCM RAM.
+ifeq ($(PLATFORM),DM644x)
+	SOURCES += dm644x.c
+	IRAM_SIZE  := 0x00004000
+	DRAM_START := 0x00008000
+	DRAM_SIZE  := 0x00004000
+endif
+ifeq ($(PLATFORM),DM35x)
+	SOURCES += dm35x.c
+	IRAM_SIZE  := 0x00008000
+	DRAM_START := 0x00010000
+	DRAM_SIZE  := 0x00008000
+endif
+
+LDFLAGS += --defsym __DDR_SIZE=$(DDR_SIZE) \
+           --defsym __IRAM_SIZE=$(IRAM_SIZE) \
+           --defsym __DRAM_START=$(DRAM_START) \
+           --defsym __DRAM_SIZE=$(DRAM_SIZE) \
+           -T ubl.lds
+
+# NAND flash setup
+ifeq ($(FLASH_TYPE),FLASH_TYPE_NAND)
+	SOURCES += nandboot.c nand.c
+endif
+ifeq ($(FLASH_TYPE),FLASH_TYPE_NOR)
+	SOURCES += norboot.c nor.c
+endif
+
+OBJECTS := $(patsubst %.c,%.o,$(SOURCES))
+EXECUTABLE := ubl.elf
+BINARY := $(EXECUTABLE)
+
+DEPS_DIR := .deps
+# Creation of the dependencies directory
+$(shell mkdir -p $(DEPS_DIR))
+
+ifneq ($(MAKECMDGOALS),clean)
+ifndef BOARD
+all:
+	@echo "You must select a board."
+	@echo "List of supported boards: evmdm6446 sffsdr das minidas afeusb evmdm355"
+	@echo "Example:"
+	@echo "  make BOARD=sffsdr"; exit 1
+else
+ifndef PLATFORM
+all:
+	@echo "Invalid board"; exit 1
+else
+all: $(BINARY)
+endif
+endif
+endif
+
+# Including the dependency files (except during clean rules, so Make won't
+# create them only to immediately remove them again). Each one of them will
+# become a target in this Makefile (that is why the 'include' command must be
+# placed after the 'all' target). If a dependency file is not found or is out
+# of date, it is built or updated.
+# If any have actually been changed, Make restarts with a clean state and
+# reads all the dependency makefiles over again.
+ifneq ($(MAKECMDGOALS),clean)
+ifneq "$(SOURCES)" ""
+ifdef BOARD
+-include $(patsubst %.c,$(DEPS_DIR)/%.d,$(SOURCES))
+endif
+endif
+endif
+
+clean:
+	-@rm -f -v *.o $(EXECUTABLE)
+	-@rm -f -v *.map
+	-@rm -f -v *~
+	-@rm -f -v config.h
+	-@rm -f -r $(DEPS_DIR)
+
+check:
+	-@checkpatch.pl --no-tree --file *.c *.h | more
+
+$(EXECUTABLE): $(OBJECTS)
+	$(LD) $(LDFLAGS) $(OBJECTS) -o $@
+
+# Any source files depend on automatically generated config.h.
+# This is necessary to recompile everything when we change boards.
+*.o: config.h $(LINKERSCRIPT)
+
+# The preprocessor of the compiler is used to generate a string representing
+# the dependencies of the input file. This is done invoking the compiler with
+# the -MM option (like -M but omit system header files). The purpose of the
+# sed script is to add the name of the dependency file (.d) to the string
+# returned by the preprocessor, like in the following example:
+#   "main.o: main.c main.h" would become "main.o main.d: main.c main.h"
+# The MAKE '$*' automatic variable represents the stem with which an implicit
+# rule match. This would be 'main' in the above example.
+#
+# Use of the $(SHELL) function: Double quotes must be used to surround the
+# command.
+#
+# In MAKE, using '$$' will produce a single dollar sign. When using only '$',
+# MAKE tries to expand the variable following the dollar sign. Additionally,
+# and for an obscure reason, '$1' must be preceded by a backslash on the
+# command line. This is why '\$$1' is used in the command line of the shell to
+# be seen as '$1' by the PERL script.
+#
+# The `-e' flag to the shell makes it exit immediately if the $(CC) command
+# fails (exits with a nonzero status). Normally the shell exits with the
+# status of the last command in the pipeline (sed in this case), so make would
+# not notice a nonzero status from the compiler.
+$(DEPS_DIR)/%.d: %.c
+	@echo "Generating dependencies for $<"
+	@$(SHELL) -ec '$(CC) -MM $(CPPFLAGS) $< | \
+	  sed '\''s/\($*\)\.o[ :]*/\1.o $(DEPS_DIR)\/$*.d : /g'\'' > $@; \
+	  [ -s $@ ] || rm -f $@'
diff --git a/README b/README
new file mode 100644
index 0000000..99207f6
--- /dev/null
+++ b/README
@@ -0,0 +1,21 @@
+ README for HVUBL
+
+This UBL can be used for flashing itself and a 2nd stage bootloader (usually
+U-boot) in flash memory.
+
+It can also be used to flash an arbitrary data image into flash, without
+a header.
+
+It can also be used to run DDR RAM memory testing.
+
+To compile HVUBL for the sffsdr board, for example, run:
+  $> make BOARD=sffsdr
+
+The Makefile honors the CROSS_COMPILE environment variable to specify the prefix
+of your ARM gcc toolchain. If it is not set, it defaults to:
+  CROSS_COMPILE=arm-linux-
+
+You can override it like this, for example:
+  $> make CROSS_COMPILE=arm-angstrom-linux-gnueabi- BOARD=sffsdr
+
+The output file, in ARM ELF format, will be named <ubl.elf>
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..fdba966
--- /dev/null
+++ b/TODO
@@ -0,0 +1,7 @@
+ TODO
+
+-Define DDR bus width and number of banks for each board.
+-NAND write & read page: do bound checking on
+ block number < maximum number of blocks.
+-When writing something other than UBL, use
+ Linux and U-Boot standard ECC layout.
diff --git a/board.h b/board.h
new file mode 100644
index 0000000..cc4df5b
--- /dev/null
+++ b/board.h
@@ -0,0 +1,76 @@
+/*
+ * board.h - board definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _BOARD_H_
+#define _BOARD_H_
+
+#include "common.h"
+#include "davinci.h"
+
+#if defined(board_dvevm)
+#  define PINMUX1_DEFAULT PINMUX1_UART0
+
+#elif defined(board_sffsdr)
+#  define PINMUX1_DEFAULT PINMUX1_UART0 | PINMUX1_UART1 | PINMUX1_I2C | \
+	PINMUX1_ASP
+
+#elif defined(board_das)
+#  define PINMUX0_DEFAULT PINMUX0_VLYNQEN | VLYNQ_WIDTH_4
+#  define PINMUX1_DEFAULT PINMUX1_UART0 | PINMUX1_UART2 | PINMUX1_I2C | \
+	PINMUX1_SPI
+
+#elif defined(board_dm355evm)
+#  define PINMUX0_DEFAULT 0x00007F55 /* All Video Inputs */
+#  define PINMUX1_DEFAULT 0x00145555 /* All Video Outputs */
+#  define PINMUX2_DEFAULT 0x00000004 /* EMIFA */
+#  define PINMUX3_DEFAULT 0x1BFF55FF /* SPI0, SPI1, UART1, I2C, SD0, SD1,
+				      * ASP0, CLKOUTs */
+#  define PINMUX4_DEFAULT 0x00000000 /* MMC/SD0 instead of MS, SPI0 */
+
+#elif defined(board_minidas)
+#  define PINMUX0_DEFAULT 0x00005C00 /* 8-bits video input, rest is GPIOs. */
+#  define PINMUX1_DEFAULT 0x00430000 /* All GPIOs (temporary: no PWM1 for buzzer) */
+#  define PINMUX2_DEFAULT 0x00000C0A /* EMIF A3:13, CE0 & CE1. */
+#  define PINMUX3_DEFAULT 0x0B7BAAC0 /* SPI0, SPI1, UART1, UART2, I2C, SD0,
+				      * CLKOUT1, CLKOUT2 */
+#  define PINMUX4_DEFAULT 0x00000001 /* MMC/SD0 + SPI0_SDI */
+
+/* Optional GPIO used as a status LED. Make sure to enable the corresponding
+ * PINMUX bit. */
+#define STATUS_LED    GPIO(71)
+#define DSP1_PWR_ENA  GPIO(95)
+#define DSP2_PWR_ENA  GPIO(94)
+#define HDD_ENA       GPIO(96)
+#define FULL_ENA      GPIO(68)
+#define ALCOHOL_ENA   GPIO(73)
+#define CAMERA_RESETn GPIO(72)
+#define FAN           GPIO(81)
+#define BUZZER        GPIO(80)
+#define WIFI_RESETn   GPIO(79)
+#define GPS_RESETn    GPIO(78)
+#define CAN_RESETn    GPIO(77)
+#define ATA_RESETn    GPIO(76)
+
+#endif
+
+#endif /* _BOARD_H_ */
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..76d52ac
--- /dev/null
+++ b/common.h
@@ -0,0 +1,98 @@
+/*
+ * common.h - common definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _COMMON_H_
+#define _COMMON_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h> /* For size_t */
+
+#include "board.h"
+
+/* Return types */
+#define E_PASS    0
+#define E_FAIL    1
+#define E_TIMEOUT 2
+
+/* Define this to have more verbose NAND debug messages */
+/* #define NAND_DEBUG 1 */
+
+/* Define this to write a RAMP into NAND for debugging. */
+/* #define NAND_DEBUG_WRITE_RAMP 1 */
+
+#define UBL_VERSION_STR "HV-UBL v0.2.11"
+
+/* Define this for bypassing the ECC check when reading from the NAND.
+ * This is useful for debugging or during development. */
+#define NAND_BYPASS_READ_PAGE_ECC_CHECK 1
+
+#define MAGIC_NUMBER_MASK       0xFFFFFF00
+#define MAGIC_NUMBER_VALID      0xA1ACED00
+
+/* RBL magic numbers */
+#define RBL_MAGIC_SAFE		0xA1ACED00 /* Describes UBL flash image type for
+					    * RBL. */
+
+/* UBL magic numbers */
+#define UBL_MAGIC_BIN_IMG	0xA1ACED66 /* Describes binary flash image type
+					    * for UBL. */
+#define UBL_MAGIC_GZIP_IMG	0xA1ACED77 /* Describes gzipped binary flash
+					    * image type for UBL. */
+
+/* UBL commands */
+#define UBL_CMD_FLASH_UBL_APP	0xA1ACEDCC /* Download UBL & application via
+					    * UART and burn in flash. */
+#define UBL_CMD_FLASH_DATA	0xA1ACEDCD /* Download data via UART and
+					    * burn in flash (no header in flash). */
+#define UBL_CMD_FLASH_ERASE	0xA1ACEDCE /* Erase the whole flash. */
+#define UBL_CMD_RUN_APP		0xA1ACEDDD /* Load and run application via UART. */
+#define UBL_CMD_DDR_TEST	0xA1ACEDEE /* Test DDR2 memory. */
+
+/* Define maximum downloadable image size */
+#define MAX_IMAGE_SIZE		0xC00000 /* 12 Mbytes */
+
+struct nor_boot_t {
+	uint32_t magicNum;
+	uint32_t entryPoint;	
+	uint32_t appSize;
+	uint32_t ldAddress;	/* Starting RAM address where image is to copied - XIP Mode */
+};
+
+enum bootmode_t {
+	NON_SECURE_NAND = 0, /* Non-secure NAND mode */
+	NON_SECURE_NOR,      /* Non-secure NOR mode */
+	UNKNOWN_MODE,        /* Unknown mode */
+	NON_SECURE_UART      /* Non-secure UART mode */
+};
+
+#define ENDIAN_SWAP(a) (((a&0xFF)<<24)|((a&0xFF0000)>>8)|((a&0xFF00)<<8)|((a&0xFF000000)>>24))
+
+/* Log functions */
+#define log_fail(_x_)  uart_send_str_lf(_x_)
+#define log_info(_x_)  uart_send_str_lf(_x_)
+#define log_debug(_x_) uart_send_str_lf(_x_)
+
+#define host_msg(_x_)  uart_send_str_lf(_x_)
+
+#endif /* _COMMON_H_ */
diff --git a/crc.c b/crc.c
new file mode 100644
index 0000000..58e8ce9
--- /dev/null
+++ b/crc.c
@@ -0,0 +1,78 @@
+/*
+ * crc.h -- CRC routines
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on dv-boot, original copyright follows:
+ *   Copyright (c) 2007 Sergey Kubushin <ksi@koi8.net>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <string.h> /* For size_t */
+
+#define CRC_TABLE_ELEMENTS 256
+#define DAVINCI_CRC_POLY   0x04C11DB7
+
+static uint32_t crc32_table[CRC_TABLE_ELEMENTS];
+
+static uint32_t
+reflect_num(uint32_t in_val, uint32_t num)
+{
+	uint32_t i;
+	uint32_t out_val = 0x0;
+
+	for (i = 1; i < (num + 1); i++) {
+		out_val |= (uint32_t)(((in_val & 0x1)) << (num - i));
+		in_val >>= 1;
+	}
+
+	return out_val;
+}
+
+/* Build a reflected CRC-32 table (for standard CRC-32 algorithm) */
+void
+crc32_dv_build_table(void)
+{
+	uint32_t i, j, crc_accum;
+
+	for (i = 0; i < CRC_TABLE_ELEMENTS; i++) {
+		crc_accum = reflect_num(i, 8) << (32 - 8);
+		for (j = 0; j < 8; j++) {
+			if ((crc_accum & 0x80000000) != 0x00000000)
+				crc_accum = (crc_accum << 1) ^ DAVINCI_CRC_POLY;
+			else
+				crc_accum = (crc_accum << 1);
+
+			crc32_table[i] = reflect_num(crc_accum, 32);
+		}
+	}
+}
+
+/* Compute CRC32 checksum */
+uint32_t
+crc32_dv_compute(uint8_t *data, size_t size)
+{
+	uint32_t crc32 = 0xFFFFFFFF;
+
+	while (size-- > 0)
+		crc32 = crc32_table[(crc32 ^ *data++) & 0xFF] ^ (crc32 >> 8);
+
+	return crc32;
+}
diff --git a/crc.h b/crc.h
new file mode 100644
index 0000000..ef18563
--- /dev/null
+++ b/crc.h
@@ -0,0 +1,34 @@
+/*
+ * crc.h -- CRC definitions.
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef CRC_H
+#define CRC_H 1
+
+#include <stdint.h>
+#include <string.h> /* For size_t, memcpy, memset */
+
+/* Build a reflected CRC-32 table (for standard CRC-32 algorithm) */
+void crc32_dv_build_table(void);
+
+/* Compute non-standard CRC32 */
+uint32_t
+crc32_dv_compute(uint8_t *data, size_t size);
+
+#endif /* CRC_H */
diff --git a/davinci.c b/davinci.c
new file mode 100644
index 0000000..ee8fae0
--- /dev/null
+++ b/davinci.c
@@ -0,0 +1,528 @@
+/*
+ * davinci.c - common DaVinci platform initialization
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "ddr.h"
+#include "util.h"
+#include "uart.h"
+#include "gpio.h"
+
+extern enum bootmode_t bootmode;
+extern const int8_t lpsc_en_list[];
+extern const int8_t lpsc_emurstie_list[];
+extern const size_t lpsc_en_list_len;
+extern const size_t lpsc_emurstie_list_len;
+
+/* Symbol from linker script */
+extern uint32_t __DDR_START;
+
+static void
+pinmuxControl(uint32_t regOffset, uint32_t mask, uint32_t value)
+{
+	SYSTEM->PINMUX[regOffset] &= ~mask;
+	SYSTEM->PINMUX[regOffset] |= (mask & value);
+}
+
+static void
+lpsc_tansition(uint8_t module, uint8_t domain, uint8_t state)
+{
+	/* Wait for any outstanding transition to complete */
+	while ((PSC->PTSTAT) & (0x00000001 << domain))
+		;
+
+	/* If we are already in that state, just return */
+	if (((PSC->MDSTAT[module]) & 0x1F) == state)
+		return;
+
+	/* Perform transition */
+	PSC->MDCTL[module] = ((PSC->MDCTL[module]) & (0xFFFFFFE0)) | (state);
+	PSC->PTCMD |= (0x00000001 << domain);
+
+	/* Wait for transition to complete */
+	while ((PSC->PTSTAT) & (0x00000001 << domain))
+		;
+
+	/* Wait and verify the state */
+	while (((PSC->MDSTAT[module]) & 0x1F) != state)
+		;
+}
+
+static void
+ivt_init(void)
+{
+	volatile uint32_t *ivect;
+	extern uint32_t __IVT;
+
+	if (bootmode == NON_SECURE_NOR) {
+		ivect = &(__IVT);
+		*ivect++ = 0xEAFFFFFE;  /* Reset @ 0x00*/
+	} else
+		ivect = &(__IVT) + 4;
+
+	*ivect++ = 0xEAFFFFFE;  /* Undefined Address @ 0x04 */
+	*ivect++ = 0xEAFFFFFE;  /* Software Interrupt @0x08 */
+	*ivect++ = 0xEAFFFFFE;  /* Pre-Fetch Abort @ 0x0C */
+	*ivect++ = 0xEAFFFFFE;  /* Data Abort @ 0x10 */
+	*ivect++ = 0xEAFFFFFE;  /* Reserved @ 0x14 */
+	*ivect++ = 0xEAFFFFFE;  /* IRQ @ 0x18 */
+	*ivect   = 0xEAFFFFFE;	/* FIQ @ 0x1C */
+}
+
+static int
+timer0_init(void)
+{
+	TIMER0->TGCR  = 0x00000000; /* Reset timer */
+	TIMER0->TCR   = 0x00000000; /* Disable timer */
+	TIMER0->TIM12 = 0x00000000; /* Reset timer count to zero */
+
+	/* Set timer period (5 seconds timeout) */
+	TIMER0->PRD12 = SYSTEM_CLK_HZ * 5;
+
+	return E_PASS;
+}
+
+void
+timer0_start(void)
+{
+	AINTC->IRQ1  |= 0x00000001; /* Clear interrupt */
+	TIMER0->TGCR  = 0x00000000; /* Reset timer */
+	TIMER0->TIM12 = 0x00000000; /* Reset timer count to zero */
+	TIMER0->TCR   = 0x00000040; /* Setup for one-shot mode */
+	TIMER0->TGCR  = 0x00000005; /* Start TIMER12 in 32-bits mode. */
+}
+
+uint32_t
+timer0_status(void)
+{
+	return AINTC->IRQ1 & 0x1;
+}
+
+static int
+uart0_init(void)
+{
+	UART0->PWREMU_MGNT = 0; /* Reset UART TX & RX components */
+	waitloop(100);
+
+	/* Set DLAB bit - allows setting of clock divisors */
+	UART0->LCR |= 0x80;
+
+	/*
+	 * Compute divisor value. Normally, we should simply return:
+	 *   SYSTEM_CLK_HZ / (16 * baudrate)
+	 * but we need to round that value by adding 0.5.
+	 * Rounding is especially important at high baud rates.
+	 */
+	UART0->DLL = (SYSTEM_CLK_HZ + (UART_BAUDRATE * (UART_BCLK_RATIO / 2))) /
+		(UART_BCLK_RATIO * UART_BAUDRATE);
+	UART0->DLH = 0x00;
+
+	UART0->FCR = 0x0007; /* Clear UART TX & RX FIFOs */
+	UART0->MCR = 0x0000; /* RTS & CTS disabled,
+			      * Loopback mode disabled,
+			      * Autoflow disabled
+			      */
+
+	UART0->LCR = 0x0003; /* Clear DLAB bit
+			      * 8-bit words,
+			      * 1 STOP bit generated,
+			      * No Parity, No Stick paritiy,
+			      * No Break control
+			      */
+
+	/* Enable receiver, transmitter, set to run.  */
+	UART0->PWREMU_MGNT |= 0x6001;
+
+	return E_PASS;
+}
+
+static int
+pll_init(volatile struct pll_regs_t *pll, int pll_mult, int plldiv_ratio[5])
+{
+	int k;
+	volatile uint32_t *plldiv_reg[5];
+	int pll_is_powered_up =
+		(pll->PLLCTL & DEVICE_PLLCTL_PLLPWRDN_MASK) >> 1;
+
+	plldiv_reg[0] = &pll->PLLDIV1;
+	plldiv_reg[1] = &pll->PLLDIV2;
+	plldiv_reg[2] = &pll->PLLDIV3;
+	plldiv_reg[3] = &pll->PLLDIV4;
+	plldiv_reg[4] = &pll->PLLDIV5;
+
+	/* Set PLL clock input to internal osc. */
+	pll->PLLCTL &= ~(DEVICE_PLLCTL_CLKMODE_MASK);
+
+	/* Set PLL to bypass, then wait for PLL to stabilize */
+	pll->PLLCTL &= ~(DEVICE_PLLCTL_PLLENSRC_MASK |
+			 DEVICE_PLLCTL_PLLEN_MASK);
+	waitloop(150);
+
+	/* Reset PLL: Warning, bit state is inverted for DM644x vs DM35x. */
+#if defined(DM644x)
+	pll->PLLCTL &= ~DEVICE_PLLCTL_PLLRST_MASK;
+#elif defined(DM35x)
+	pll->PLLCTL |= DEVICE_PLLCTL_PLLRST_MASK;
+#endif
+
+	if (pll_is_powered_up) {
+		/* Disable PLL */
+		pll->PLLCTL |= DEVICE_PLLCTL_PLLDIS_MASK;
+
+		/* Powerup PLL */
+		pll->PLLCTL &= ~(DEVICE_PLLCTL_PLLPWRDN_MASK);
+	}
+
+	/* Enable PLL */
+	pll->PLLCTL &= ~(DEVICE_PLLCTL_PLLDIS_MASK);
+
+	/* Wait for PLL to stabilize */
+	waitloop(150);
+
+	/* Load PLL multiplier. */
+	pll->PLLM = (pll_mult - 1) & 0xff;
+
+	/* Set and enable dividers as needed. */
+	for (k = 0; k < 5; k++) {
+		if (plldiv_ratio[k] > 0)
+			*(plldiv_reg[k]) |= DEVICE_PLLDIV_EN_MASK |
+				(plldiv_ratio[k] - 1);
+	}
+
+#if defined(DM35x)
+	/* Set the processor AIM wait state and PLL1 post-divider to to 1 */
+	SYSTEM->MISC &= ~(DEVICE_MISC_PLL1POSTDIV_MASK |
+			  DEVICE_MISC_AIMWAITST_MASK);
+#endif
+
+	/* Initiate a new divider transition. */
+	pll->PLLCMD |= DEVICE_PLLCMD_GOSET_MASK;
+
+	/* Wait for completion of phase alignment. */
+	while ((pll->PLLSTAT & DEVICE_PLLSTAT_GOSTAT_MASK))
+		;
+
+	/* Wait for PLL to reset ( ~5 usec ) */
+	waitloop(5000);
+
+	/* Release PLL from reset */
+
+	/* Reset PLL: Warning, bit state is inverted for DM644x vs DM35x. */
+#if defined(DM644x)
+	pll->PLLCTL |= DEVICE_PLLCTL_PLLRST_MASK;
+#elif defined(DM35x)
+	pll->PLLCTL &= ~DEVICE_PLLCTL_PLLRST_MASK;
+#endif
+
+	/* Wait for PLL to re-lock:
+	 * DM644z: 2000P
+	 * DM35x:  8000P
+	 */
+	waitloop(8000);
+
+	/* Switch out of BYPASS mode */
+	pll->PLLCTL |= DEVICE_PLLCTL_PLLEN_MASK;
+
+	return E_PASS;
+}
+
+static int
+pll1_init(void)
+{
+	int plldiv_ratio[5];
+
+#if defined(DM644x)
+	plldiv_ratio[0] =  1; /* PLLDIV1 fixed */
+	plldiv_ratio[1] =  2; /* PLLDIV2 fixed */
+	plldiv_ratio[2] =  3; /* PLLDIV3 fixed */
+	plldiv_ratio[3] = -1; /* PLLDIV4 not used */
+	plldiv_ratio[4] =  6; /* PLLDIV5 fixed */
+#elif defined(DM35x)
+	plldiv_ratio[0] =  2; /* PLLDIV1 fixed */
+	plldiv_ratio[1] =  4; /* PLLDIV2 fixed */
+
+	/* Calculate PLL divider ratio for divider 3 (feeds VPBE) */
+	plldiv_ratio[2] = 0;
+	while ((plldiv_ratio[2] * VPBE_CLK_HZ) <
+	       (SYSTEM_CLK_HZ * (PLL1_Mult >> 3)))
+		plldiv_ratio[2]++;
+
+	/* Check to make sure we can supply accurate VPBE clock */
+	if ((plldiv_ratio[2] * VPBE_CLK_HZ) !=
+	    (SYSTEM_CLK_HZ * (PLL1_Mult >> 3)))
+		return E_FAIL;
+
+	/* See the device datasheet for more info (must be 2 or 4) */
+	plldiv_ratio[3] =  4;
+	plldiv_ratio[4] = -1; /* PLLDIV5 not used */
+#endif
+
+	return pll_init(PLL1, PLL1_Mult, plldiv_ratio);
+}
+
+static int
+pll2_init(void)
+{
+	int plldiv_ratio[5];
+
+	plldiv_ratio[0] = PLL2_Div1;
+	plldiv_ratio[1] = PLL2_Div2;
+	plldiv_ratio[2] = -1; /* PLLDIV3 not used */
+	plldiv_ratio[3] = -1; /* PLLDIV4 not used */
+	plldiv_ratio[4] = -1; /* PLLDIV5 not used */
+
+	return pll_init(PLL2, PLL2_Mult, plldiv_ratio);
+}
+
+static void
+ddr_timing_setup(void)
+{
+	/* The configuration of DDRPHYCR is not dependent on the DDR2 device
+	 * specification but rather on the board layout.
+	 * Setup the read latency and clear DLLPWRDN */
+	DDR->DDRPHYCR = DDRPHYCR_DEFAULT |
+		(DDR_READ_Latency & DDRPHYCR_READLAT_MASK);
+
+	/*
+	 * Set the PR_OLD_COUNT bits in the Bus Burst Priority Register (PBBPR)
+	 * as suggested in TMS320DM6446 errata 2.1.2:
+	 *
+	 * On DM6446 Silicon Revision 2.1 and earlier, under certain conditions
+	 * low priority modules can occupy the bus and prevent high priority
+	 * modules like the VPSS from getting the required DDR2 throughput.
+	 */
+	DDR->PBBPR = DDR_PBBPR_PR_OLD_COUNT;
+
+	/* TIMUNLOCK (unlocked), CAS Latency, number of banks and page size */
+	DDR->SDBCR = SDBCR_DEFAULT |
+		SDBCR_TIMUNLOCK |
+		(DDR_NM << 14)   |
+		(DDR_CL << 9)    |
+		(DDR_IBANK << 4) |
+		(DDR_PAGESIZE << 0);
+
+	/* Program timing registers */
+	DDR->SDTIMR = (DDR_T_RFC << 25) |
+		(DDR_T_RP << 22)  |
+		(DDR_T_RCD << 19) |
+		(DDR_T_WR << 16)  |
+		(DDR_T_RAS << 11) |
+		(DDR_T_RC << 6)   |
+		(DDR_T_RRD << 3)  |
+		(DDR_T_WTR << 0);
+
+	DDR->SDTIMR2 = (DDR_T_XSNR << 16) |
+		(DDR_T_XSRD << 8)  |
+		(DDR_T_RTP << 5)   |
+		(DDR_T_CKE << 0);
+#if defined(DM35x)
+	DDR->SDTIMR2 |= (DDR_T_RASMAX << 27)  |
+		(DDR_T_XP << 25);
+#endif
+
+	/* Clear the TIMUNLOCK bit (locked) */
+	DDR->SDBCR &= ~SDBCR_TIMUNLOCK;
+
+	/* Set the refresh rate */
+	DDR->SDRCR = DDR_RR;
+}
+
+static void
+ddr_reset(void)
+{
+	/* Perform a soft reset to the DDR2 memory controller:
+	 * Put in SYNCRESET and enable it again. */
+	lpsc_tansition(LPSC_DDR2, PD0, PSC_SYNCRESET);
+	lpsc_tansition(LPSC_DDR2, PD0, PSC_ENABLE);
+}
+
+static int
+ddr_init(void)
+{
+	volatile uint32_t *ddr_start = &__DDR_START;
+	/* For reading/writing dummy value in order to apply timing settings */
+	volatile uint32_t ddr_dummy_read;
+
+	/* Enable DDR2 module. */
+	lpsc_tansition(LPSC_DDR2, PD0, PSC_ENABLE);
+
+#if defined(DM35x)
+	ddr_vtp_calibration();
+	ddr_reset();
+#endif
+
+	ddr_timing_setup();
+
+	/* Dummy read to apply timing settings */
+	ddr_dummy_read = ddr_start[0];
+
+#if defined(DM644x)
+	ddr_reset();
+	ddr_vtp_calibration();
+#endif
+
+	/* Verify correct initialization. */
+	ddr_start[0] = DDR_TEST_PATTERN;
+	if (ddr_start[0] != DDR_TEST_PATTERN) {
+		log_fail("DDR init failed");
+		return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+static void
+psc_init(void)
+{
+	uint32_t i;
+
+#if defined(DM35x)
+	/* Do always on power domain transitions */
+	while ((PSC->PTSTAT) & 0x00000001);
+#elif defined(DM644x)
+	/*
+	 * Workaround for TMS320DM6446 errata 1.3.22
+	 * (Revision(s) Affected: 1.3 and earlier):
+	 *   PSC: PTSTAT Register Does Not Clear After Warm/Maximum Reset.
+	 *   Clear the reserved location at address 0x01C41A20
+	 */
+	PSC_PTSTAT_WORKAROUND_REG = 0;
+
+	/* Put the C64x+ Core into reset (if it's on) */
+	PSC->MDCTL[LPSC_DSP] &= (~0x00000100);
+	PSC->PTCMD |= 0x00000002;
+	while ((PSC->PTSTAT) & (0x00000002));
+	while ((PSC->MDSTAT[LPSC_DSP]) & (0x00000100));
+#endif
+
+	/* Enable selected modules */
+	for (i = 0; i < lpsc_en_list_len; i++) {
+		int8_t k = lpsc_en_list[i];
+
+		PSC->MDCTL[k] = (PSC->MDCTL[k] & 0xFFFFFFE0) | PSC_ENABLE;
+	}
+
+	/* Set EMURSTIE on selected modules */
+	for (i = 0; i < lpsc_emurstie_list_len; i++) {
+		int8_t k = lpsc_emurstie_list[i];
+
+		PSC->MDCTL[k] |= EMURSTIE_MASK;
+	}
+
+	/* Do Always-On Power Domain Transitions */
+	PSC->PTCMD |= 0x00000001;
+	while ((PSC->PTSTAT) & 0x00000001);
+
+#if defined(DM644x)
+	/* DO DSP Power Domain Transitions */
+	PSC->PTCMD |= 0x00000002;
+	while ((PSC->PTSTAT) & (0x00000002));
+#endif
+
+	/* Clear EMURSTIE on selected modules */
+	for (i = 0; i < lpsc_emurstie_list_len; i++) {
+		int8_t k = lpsc_emurstie_list[i];
+
+		PSC->MDCTL[k] &= (~EMURSTIE_MASK);
+	}
+}
+
+int
+davinci_platform_init(char *version)
+{
+	int status = E_PASS;
+
+	psc_init();
+
+	/* Disable ARM interrupts */
+	AINTC->INTCTL = 0x4;
+	AINTC->EABASE = 0x0;
+	AINTC->EINT0  = 0x0;
+	AINTC->EINT1  = 0x0;
+
+	AINTC->FIQ0 = 0xFFFFFFFF;
+	AINTC->FIQ1 = 0xFFFFFFFF;
+	AINTC->IRQ0 = 0xFFFFFFFF;
+	AINTC->IRQ1 = 0xFFFFFFFF;
+
+#ifdef PINMUX0_DEFAULT
+	pinmuxControl(0, 0xFFFFFFFF, PINMUX0_DEFAULT);
+#endif
+#ifdef PINMUX1_DEFAULT
+	pinmuxControl(1, 0xFFFFFFFF, PINMUX1_DEFAULT);
+#endif
+/* The folowing are only available on DM35x */
+#ifdef PINMUX2_DEFAULT
+	pinmuxControl(2, 0xFFFFFFFF, PINMUX2_DEFAULT);
+#endif
+#ifdef PINMUX3_DEFAULT
+	pinmuxControl(3, 0xFFFFFFFF, PINMUX3_DEFAULT);
+#endif
+#ifdef PINMUX4_DEFAULT
+	pinmuxControl(4, 0xFFFFFFFF, PINMUX4_DEFAULT);
+#endif
+
+	if (status == E_PASS)
+		status |= pll1_init();
+
+	if (status == E_PASS)
+		status |= uart0_init();
+
+	if (status == E_PASS)
+		status |= timer0_init();
+
+	uart_send_lf();
+	log_info(version);
+
+	if (status == E_PASS)
+		status |= pll2_init();
+
+	if (status == E_PASS)
+		status |= ddr_init();
+
+#ifdef STATUS_LED
+	gpio_direction_out(STATUS_LED, 1);
+#endif /* STATUS_LED */
+
+#ifdef board_minidas
+	gpio_direction_out(FAN, 0);
+	gpio_direction_out(BUZZER, 0);
+
+	/* Put all peripherals in RESET state */
+	gpio_direction_out(DSP1_PWR_ENA, 0);
+	gpio_direction_out(DSP2_PWR_ENA, 0);
+	gpio_direction_out(WIFI_RESETn, 0);
+	gpio_direction_out(GPS_RESETn, 0);
+	gpio_direction_out(CAN_RESETn, 0);
+	gpio_direction_out(ATA_RESETn, 0);
+	gpio_direction_out(CAMERA_RESETn, 0);
+
+	/* Enable power for hard disk */
+	gpio_direction_out(HDD_ENA, 1);
+#endif
+
+	/* IRQ Vector Table Setup */
+	ivt_init();
+
+	return status;
+}
diff --git a/davinci.h b/davinci.h
new file mode 100644
index 0000000..2f81146
--- /dev/null
+++ b/davinci.h
@@ -0,0 +1,463 @@
+/*
+ * davinci.h - common DaVinci platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DAVINCI_H_
+#define _DAVINCI_H_
+
+#include "common.h"
+
+#if defined(DM644x)
+#include "dm644x.h"
+#elif defined(DM35x)
+#include "dm35x.h"
+#endif
+
+/* -------------------------------------------------------------------------- *
+ *    System Control Module register structure - See sprue14.pdf, Chapter 10  *
+ *       for more details.                                                    *
+ * -------------------------------------------------------------------------- */ 
+struct sys_module_regs_t {
+#if defined(DM644x)
+	uint32_t PINMUX[2];         //0x00
+	uint32_t DSPBOOTADDR;       //0x08
+	uint32_t SUSPSRC;           //0x0C
+	uint32_t INTGEN;            //0x10
+#elif defined(DM35x)
+	uint32_t PINMUX[5];         //0x00
+#endif
+	uint32_t BOOTCFG;           //0x14
+	uint32_t ARM_INTMUX;        //0x18 - ONLY ON DM35x
+	uint32_t EDMA_EVTMUX;       //0x1C - ONLY ON DM35x
+	uint32_t DDR_SLEW;          //0x20 - ONLY ON DM35x
+	uint32_t CLKOUT;            //0x24 - ONLY ON DM35x
+	uint32_t DEVICE_ID;         //0x28
+	uint32_t VDAC_CONFIG;       //0x2C - ONLY ON DM35x
+	uint32_t TIMER64_CTL;       //0x30 - ONLY ON DM35x
+	uint32_t USBPHY_CTL;        //0x34
+#if defined(DM644x)
+	uint32_t CHP_SHRTSW;        //0x38
+#elif defined(DM35x)
+	uint32_t MISC;              //0x38
+#endif
+	uint32_t MSTPRI[2];         //0x3C
+	uint32_t VPSS_CLKCTL;       //0x44
+#if defined(DM644x)
+	uint32_t VDD3P3V_PWDN;      //0x48
+	uint32_t DDRVTPER;          //0x4C
+	uint32_t RSVD2[8];          //0x50 
+#elif defined(DM35x)
+	uint32_t DEEPSLEEP;         //0x48
+	uint32_t RSVD0;             //0x4C
+	uint32_t DEBOUNCE[8];       //0x50
+	uint32_t VTPIOCR;           //0x70
+#endif
+};
+
+#define SYSTEM ((volatile struct sys_module_regs_t *) 0x01C40000)
+
+/* -------------------------------------------------------------------------- *
+ *    ARM Interrupt Controller register structure - See sprue26.pdf for more  *
+ *       details.                                                             *
+ * -------------------------------------------------------------------------- */
+struct aintc_regs_t {
+	uint32_t FIQ0;
+	uint32_t FIQ1;
+	uint32_t IRQ0;
+	uint32_t IRQ1;
+	uint32_t FIQENTRY;
+	uint32_t IRQENTRY;
+	uint32_t EINT0;
+	uint32_t EINT1;
+	uint32_t INTCTL;
+	uint32_t EABASE;
+	uint32_t RSVD0[2];
+	uint32_t INTPRI0;
+	uint32_t INTPRI1;
+	uint32_t INTPRI2;
+	uint32_t INTPRI3;
+	uint32_t INTPRI4;
+	uint32_t INTPRI5;
+	uint32_t INTPRI6;
+	uint32_t INTPRI7;
+};
+
+#define AINTC ((volatile struct aintc_regs_t *) 0x01C48000)
+
+/* -------------------------------------------------------------------------- *
+ *    PLL Register structure - See sprue14.pdf, Chapter 6 for more details.   *
+ * -------------------------------------------------------------------------- */
+struct pll_regs_t {
+	uint32_t PID;
+	uint32_t RSVD0[56];
+	uint32_t RSTYPE;    /* 0x0E4 */
+	uint32_t RSVD1[6];
+	uint32_t PLLCTL;    /* 0x100 */
+	uint32_t RSVD2[3];
+	uint32_t PLLM;      /* 0x110 */
+	uint32_t RSVD3;
+	uint32_t PLLDIV1;   /* 0x118 */
+	uint32_t PLLDIV2;
+	uint32_t PLLDIV3;
+	uint32_t RSVD4;
+	uint32_t POSTDIV;   /* 0x128 */
+	uint32_t BPDIV;
+	uint32_t RSVD5[2];
+	uint32_t PLLCMD;    /* 0x138 */
+	uint32_t PLLSTAT;
+	uint32_t ALNCTL;
+	uint32_t DCHANGE;
+	uint32_t CKEN;
+	uint32_t CKSTAT;
+	uint32_t SYSTAT;
+	uint32_t RSVD6[3];
+	uint32_t PLLDIV4;   /* 0x160 - Only on DM35x */
+	uint32_t PLLDIV5;   /* 0x164 - Only on DM644x */
+};
+
+#define PLL1 ((volatile struct pll_regs_t *) 0x01C40800)
+#define PLL2 ((volatile struct pll_regs_t *) 0x01C40C00)
+
+#define DEVICE_PLLCTL_CLKMODE_MASK  0x00000100
+#define DEVICE_PLLCTL_PLLEN_MASK    0x00000001
+#define DEVICE_PLLCTL_PLLPWRDN_MASK 0x00000002
+#define DEVICE_PLLCTL_PLLRST_MASK   0x00000008
+#define DEVICE_PLLCTL_PLLDIS_MASK   0x00000010
+#define DEVICE_PLLCTL_PLLENSRC_MASK 0x00000020
+
+#define DEVICE_PLLCMD_GOSET_MASK    0x00000001
+#define DEVICE_PLLSTAT_GOSTAT_MASK  0x00000001
+#define DEVICE_PLLDIV_EN_MASK       0x00008000
+#define DEVICE_PLLSTAT_LOCK_MASK    0x00000002
+
+/* -------------------------------------------------------------------------- *
+ *    Power/Sleep Ctrl Register structure - See sprue14.pdf, Chapter 7        * 
+ *       for more details.                                                    *
+ * -------------------------------------------------------------------------- */
+struct psc_regs_t {
+	uint32_t PID;         // 0x000
+	uint32_t RSVD0[3];    // 0x004
+	uint32_t GBLCTL;      // 0x010 - NOT ON DM35x
+	uint32_t RSVD1;       // 0x014
+	uint32_t INTEVAL;     // 0x018
+	uint32_t RSVD2[9];    // 0x01C
+	uint32_t MERRPR0;     // 0x040
+	uint32_t MERRPR1;     // 0x044
+	uint32_t RSVD3[2];    // 0x048
+	uint32_t MERRCR0;     // 0x050
+	uint32_t MERRCR1;     // 0x054
+	uint32_t RSVD4[2];    // 0x058
+	uint32_t PERRPR;      // 0x060
+	uint32_t RSVD5;       // 0x064
+	uint32_t PERRCR;      // 0x068
+	uint32_t RSVD6;       // 0x06C
+	uint32_t EPCPR;       // 0x070
+	uint32_t RSVD7;       // 0x074
+	uint32_t EPCCR;       // 0x078
+	uint32_t RSVD8[33];   // 0x07C
+	uint32_t RAILSTAT;    // 0x100 - NOT ON DM35x
+	uint32_t RAILCTL;     // 0x104 - NOT ON DM35x
+	uint32_t RAILSEL;     // 0x108 - NOT ON DM35x
+	uint32_t RSVD9[5];    // 0x10C
+	uint32_t PTCMD;       // 0x120
+	uint32_t RSVD10;      // 0x124
+	uint32_t PTSTAT;      // 0x128
+	uint32_t RSVD11[53];  // 0x12C
+	uint32_t PDSTAT0;     // 0x200
+	uint32_t PDSTAT1;     // 0x204
+	uint32_t RSVD12[62];  // 0x208
+	uint32_t PDCTL0;      // 0x300
+	uint32_t PDCTL1;      // 0x304
+	uint32_t RSVD13[134]; // 0x308
+	uint32_t MCKOUT0;     // 0x520
+	uint32_t MCKOUT1;     // 0x524
+	uint32_t RSVD14[182]; // 0x528
+	uint32_t MDSTAT[41];  // 0x800
+	uint32_t RSVD15[87];  // 0x8A4
+	uint32_t MDCTL[41];   // 0xA00
+};
+
+#define PSC ((volatile struct psc_regs_t*) 0x01C41000)
+
+#if defined(DM644x)
+/* See TMS320DM6446 errata 1.3.22 */
+#define PSC_PTSTAT_WORKAROUND_REG (*((volatile uint32_t*) 0x01C41A20))
+#endif
+
+#define PD0                 0
+
+/* PSC constants */
+#define LPSC_VPSS_MAST      0
+#define LPSC_VPSS_SLV       1
+#define LPSC_EDMACC         2
+#define LPSC_EDMATC0        3
+#define LPSC_EDMATC1        4
+#if defined(DM644x)
+#define LPSC_EMAC           5
+#define LPSC_EMAC_MEM_CTL   6
+#define LPSC_MDIO           7
+#define LPSC_RESERVED0      8
+#elif defined(DM35x)
+#define LPSC_TIMER3         5
+#define LPSC_SPI1           6
+#define LPSC_MMC_SD1        7
+#define LPSC_ASP1           8
+#endif
+#define LPSC_USB            9
+#if defined(DM644x)
+#define LPSC_ATA            10
+#define LPSC_VLYNQ          11
+#define LPSC_HPI            12
+#elif defined(DM35x)
+#define LPSC_PWM3           10
+#define LPSC_SPI2           11
+#define LPSC_RTO            12
+#endif
+#define LPSC_DDR2           13
+#define LPSC_AEMIF	    14
+#define LPSC_MMC_SD0        15
+#if defined(DM644x)
+#define LPSC_RESERVED1      16
+#elif defined(DM35x)
+#define LPSC_MEMSTK         16
+#endif
+#define LPSC_ASP0           17
+#define LPSC_I2C            18
+#define LPSC_UART0          19
+#if defined(DM35x)
+#define LPSC_UART1          20
+#define LPSC_UART2          21
+#define LPSC_SPIO           22
+#define LPSC_PWM0           23
+#define LPSC_PWM1           24
+#define LPSC_PWM2           25
+#endif
+#define LPSC_GPIO           26
+#define LPSC_TIMER0         27
+#define LPSC_TIMER1         28
+#if defined(DM35x)
+#define LPSC_TIMER2         29
+#define LPSC_SYSMOD         30
+#endif
+#define LPSC_ARM            31
+#if defined(DM644x)
+#define LPSC_DSP            39
+#define LPSC_IMCOP          40
+#elif defined(DM35x)
+#define LPSC_VPSS_DAC       40
+#endif
+
+#define EMURSTIE_MASK       0x00000200
+
+#define PSC_ENABLE          0x3
+#define PSC_DISABLE         0x2
+#define PSC_SYNCRESET       0x1
+#define PSC_SWRSTDISABLE    0x0
+
+/* -------------------------------------------------------------------------- *
+ *    DDR2 Memory Ctrl Register structure - See sprue22b.pdf for more details.*
+ * -------------------------------------------------------------------------- */
+struct ddr_mem_ctl_regs_t {
+	uint32_t RSVD0;
+	uint32_t SDRSTAT;
+	uint32_t SDBCR;
+	uint32_t SDRCR;
+	uint32_t SDTIMR;
+	uint32_t SDTIMR2;
+#if defined(DM644x)
+	uint32_t RSVD1[2];
+#elif defined(DM35x)
+	uint32_t RSVD1;
+	uint32_t SDBCR2;
+#endif
+	uint32_t PBBPR; /* 0x20 */
+	uint32_t RSVD2[39];
+	uint32_t IRR;   /* 0xC0 */
+	uint32_t IMR;
+	uint32_t IMSR;
+	uint32_t IMCR;
+	uint32_t RSVD3[5];
+	uint32_t DDRPHYCR;
+	uint32_t RSVD4[2];
+#if defined(DM644x)
+	uint32_t VTPIOCR; /* 0xF0 - In system control module for DM35x */
+#endif
+};
+
+#define DDR ((volatile struct ddr_mem_ctl_regs_t *) 0x20000000)
+
+#define DDR_TEST_PATTERN 0xA55AA55A
+
+#define SDBCR_TIMUNLOCK    (1 << 15)
+
+#if defined(DM644x)
+
+#define DDRVTPR (*((volatile uint32_t*) 0x01C42030))
+
+#define DDRPHYCR_DEFAULT      0x50006400 /* Default value with reserved fields */
+#define DDRPHYCR_READLAT_MASK (0x7 << 0)
+#define SDBCR_DEFAULT         0x00130000 /* Default value with reserved fields */
+
+#elif defined(DM35x)
+#define DDRPHYCR_DEFAULT      0x28006400 /* Default value with reserved fields */
+#define DDRPHYCR_READLAT_MASK (0xF << 0)
+#define SDBCR_DEFAULT         0x00170000 /* Default value with reserved fields */
+#endif
+
+/* -------------------------------------------------------------------------- *
+ *    AEMIF Register structure - See sprue20a.pdf for more details.           *
+ * -------------------------------------------------------------------------- */
+struct emif_regs_t {
+	uint32_t ERCSR;           // 0x00
+	uint32_t AWCCR;           // 0x04
+	uint32_t SDBCR;           // 0x08 - NOT ON DM35x
+	uint32_t SDRCR;           // 0x0C - NOT ON DM35x
+	uint32_t A1CR;            // 0x10
+	uint32_t A2CR;            // 0x14
+	uint32_t A3CR;            // 0x18 - NOT ON DM35x
+	uint32_t A4CR;            // 0x1C - NOT ON DM35x
+	uint32_t SDTIMR;          // 0x20 - NOT ON DM35x
+	uint32_t DDRSR;           // 0x24 - NOT ON DM35x
+	uint32_t DDRPHYCR;        // 0x28 - NOT ON DM35x
+	uint32_t DDRPHYSR;        // 0x2C - NOT ON DM35x
+	uint32_t TOTAR;           // 0x30 - NOT ON DM35x
+	uint32_t TOTACTR;         // 0x34 - NOT ON DM35x
+	uint32_t DDRPHYID_REV;    // 0x38 - NOT ON DM35x
+	uint32_t SDSRETR;         // 0x3C - NOT ON DM35x
+	uint32_t EIRR;            // 0x40
+	uint32_t EIMR;
+	uint32_t EIMSR;
+	uint32_t EIMCR;
+	uint32_t IOCTRLR;         // 0x50 - NOT ON DM35x
+	uint32_t IOSTATR;         // 0x54 - NOT ON DM35x
+	uint32_t RSVD0;
+	uint32_t ONENANDCTL;      // 0x5C - ONLY ON DM35x  
+	uint32_t NANDFCR;         // 0x60
+	uint32_t NANDFSR;         // 0x64
+	uint32_t RSVD1[2];
+	uint32_t NANDF1ECC;       // 0x70
+	uint32_t NANDF2ECC;       // 0x74
+	uint32_t NANDF3ECC;       // 0x78 - NOT ON DM35x
+	uint32_t NANDF4ECC;       // 0x7C - NOT ON DM35x
+	uint32_t RSVD2;           // 0x80
+	uint32_t IODFTECR;
+	uint32_t IODFTGCR;
+	uint32_t RSVD3;
+	uint32_t IODFTMRLR;       // 0x90
+	uint32_t IODFTMRMR;       // 0x94
+	uint32_t IODFTMRMSBR;     // 0x98
+	uint32_t RSVD4[5];
+	uint32_t MODRNR;          // 0xB0
+	uint32_t RSVD5[2];
+	uint32_t NAND4BITECCLOAD; // 0xBC - ONLY ON DM35x  
+	uint32_t NAND4BITECC1;    // 0xC0 - ONLY ON DM35x  
+	uint32_t NAND4BITECC2;    // 0xC4 - ONLY ON DM35x  
+	uint32_t NAND4BITECC3;    // 0xC8 - ONLY ON DM35x  
+	uint32_t NAND4BITECC4;    // 0xCC - ONLY ON DM35x  
+	uint32_t NANDERRADD1;     // 0xD0 - ONLY ON DM35x  
+	uint32_t NANDERRADD2;     // 0xD4 - ONLY ON DM35x  
+	uint32_t NANDERRVAL1;     // 0xD8 - ONLY ON DM35x  
+	uint32_t NANDERRVAL2;     // 0xDC - ONLY ON DM35x
+};
+
+#if defined(DM644x)
+#define AEMIF ((volatile struct emif_regs_t *) 0x01E00000)
+#elif defined(DM35x)
+#define AEMIF ((volatile struct emif_regs_t *) 0x01E10000)
+#endif
+
+/* -------------------------------------------------------------------------- *
+ *    UART Register structure - See sprue33.pdf for more details.             *
+ * -------------------------------------------------------------------------- */
+struct uart_regs_t {
+	uint32_t RBR;
+	uint32_t IER;
+	uint32_t IIR;
+	uint32_t LCR;
+	uint32_t MCR;
+	uint32_t LSR;
+	uint32_t MSR; /* NOT ON DM35x */
+	uint32_t SCR; /* NOT ON DM35x */
+	uint32_t DLL;
+	uint32_t DLH;
+	uint32_t PID1;
+	uint32_t PID2;
+	uint32_t PWREMU_MGNT;
+};
+
+#define THR RBR
+#define FCR IIR
+
+#define UART0 ((volatile struct uart_regs_t *) 0x01C20000)
+
+#define UART_BCLK_RATIO	16	/* BCLK is 16 times the baudrate */
+#define UART_BAUDRATE	115200
+
+/* -------------------------------------------------------------------------- *
+ *    Timer Register structure - See sprue26.pdf for more details.             *
+ * -------------------------------------------------------------------------- */
+struct timer_regs_t {
+	uint32_t PID12;
+	uint32_t EMUMGT_CLKSPD;
+	uint32_t GPINT_GPEN;    // NOT ON DM35x
+	uint32_t GPTDAT_GPDIR;  // NOT ON DM35x
+	uint32_t TIM12;
+	uint32_t TIM34;
+	uint32_t PRD12;
+	uint32_t PRD34;
+	uint32_t TCR;
+	uint32_t TGCR;
+	uint32_t WDTCR;
+	uint32_t RSVD1[3];       // 0x2C - ONLY ON DM35x
+	uint32_t REL12;          // 0x34 - ONLY ON DM35x
+	uint32_t REL34;          // 0x38 - ONLY ON DM35x
+	uint32_t CAP12;          // 0x3C - ONLY ON DM35x
+	uint32_t CAP34;          // 0x40 - ONLY ON DM35x
+	uint32_t INTCTL_STAT;    // 0x44 - ONLY ON DM35x
+};
+
+#define TIMER0 ((volatile struct timer_regs_t *) 0x01C21400)
+
+struct gpio_controller {
+	uint32_t dir;
+	uint32_t out_data;
+	uint32_t set_data;
+	uint32_t clr_data;
+	uint32_t in_data;
+	uint32_t set_rising;
+	uint32_t clr_rising;
+	uint32_t set_falling;
+	uint32_t clr_falling;
+	uint32_t intstat;
+};
+
+#define DAVINCI_GPIO_BASE 0x01C67000
+
+#define GPIOC ((volatile struct gpio_controller *) DAVINCI_GPIO_BASE)
+
+int davinci_platform_init(char *version);
+void ddr_vtp_calibration(void);
+
+void timer0_start(void);
+uint32_t timer0_status(void);
+
+#endif /* _DAVINCI_H_ */
diff --git a/ddr.h b/ddr.h
new file mode 100644
index 0000000..00eae70
--- /dev/null
+++ b/ddr.h
@@ -0,0 +1,364 @@
+/*
+ * ddr.h - DDR devices parameters
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DDDR_H_
+#define _DDDR_H_
+
+#include <stdint.h>
+
+#if defined(MICRON_MT47H32M16BN_3_171MHZ)
+/* Micron MT47H32M16BN-3 @ 171 MHz settings:
+ * TCK      = 5.85 nS -> 1 / 171MHz
+ * T_REF    = 7.8 uS (varies with commercial vs industrial)
+ * T_RFC    = 105 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 115 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 2;    /* 4 banks. */
+static const uint16_t DDR_RR    = 1336; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 17;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 9;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 8;  /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 19;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H32M16BN_3_162MHZ) /* SFFSDR */
+/* Micron MT47H32M16BN-3 @ 162 MHz settings:
+ * TCK      = 6.17 nS -> 1 / 162 MHz
+ * T_REF    = 7.8 uS (varies with commercial vs industrial)
+ * T_RFC    = 105 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 115 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 2;    /* 4 banks. */
+static const uint16_t DDR_RR    = 1265; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 16;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 8;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 8;  /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 18;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16BT_37E_171MHZ) /* EVM DM355 */
+/* Micron MT47H64M16BT-37E @ 171 MHz */
+static const uint8_t  DDR_IBANK = 3;	/* 8 banks. */
+static const uint16_t DDR_RR = 1336;
+static const uint8_t  DDR_CL = 3;
+static const uint8_t  DDR_T_RFC = 21;
+static const uint8_t  DDR_T_RP = 2;
+static const uint8_t  DDR_T_RCD = 2;
+static const uint8_t  DDR_T_WR = 2;
+static const uint8_t  DDR_T_RAS = 6;
+static const uint8_t  DDR_T_RC = 9;
+static const uint8_t  DDR_T_RRD = 1;
+static const uint8_t  DDR_T_WTR = 1;
+static const uint8_t  DDR_T_RASMAX = 7;
+static const uint8_t  DDR_T_XP = 2;
+static const uint8_t  DDR_T_XSNR = 23;
+static const uint8_t  DDR_T_XSRD = 199;
+static const uint8_t  DDR_T_RTP = 3;
+static const uint8_t  DDR_T_CKE = 3;
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16HR_3_162MHZ) /* DAS Commercial */
+/* Micron MT47H64M16HR-3 @ 162 MHz settings:
+ * TCK      = 6.17 nS -> 1 / 162 MHz
+ * T_REF    = 7.8 uS (varies with commercial vs industrial)
+ * T_RFC    = 127.5 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 138 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 1265; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 20;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 8;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 2;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 8;  /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 21;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16HR_3IT_162MHZ) /* DAS industrial */
+/* Micron MT47H64M16HR-3IT @ 162 MHz settings:
+ * TCK      = 6.17 nS -> 1 / 162 MHz
+ * T_REF    = 3.9 uS (varies with commercial vs industrial)
+ * T_RFC    = 127.5 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 138 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 635; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 20;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 8;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 2;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 15; /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 21;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H64M16BT_3_162MHZ)
+/* Micron MT47H64M16HR-3IT @ 162 MHz settings:
+ * TCK      = 5.85 nS -> 1 / 162 MHz
+ * T_REF    = 3.9 uS (varies with commercial vs industrial)
+ * T_RFC    = 198 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 208 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 667; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 33;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 9;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 15; /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 34;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#elif defined(MICRON_MT47H128M16HG_3IT_171MHZ)
+/* Micron MT47H128M16HG-3IT @ 171 MHz settings:
+ * TCK      = 5.85 nS -> 1 / 171MHz
+ * T_REF    = 3.9 uS (varies with commercial vs industrial)
+ * T_RFC    = 198 nS (varies with capacity)
+ * T_RP     = 15 nS
+ * T_RCD    = 15 nS
+ * T_WR     = 15 nS
+ * T_RAS    = 40 nS
+ * T_RASMAX = 70 uS
+ * T_RTP    = 7.5 nS
+ * T_RC     = 55 nS
+ * T_RRD    = 10 nS
+ * T_WTR    = 7.5 nS
+ * T_XSRD   = 200 nS
+ * T_XSNR   = 208 nS -> T_RFC(MIN) + 10
+ * T_CKE    = 3 TCK
+ * T_XP     = 2 TCK
+ */
+static const uint8_t  DDR_IBANK = 3;   /* 8 banks. */
+static const uint16_t DDR_RR    = 667; /* DDRCLK * T_REF */
+static const uint8_t  DDR_CL    = 3;
+static const uint8_t  DDR_T_RFC = 33;  /* (T_RFC * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RP  = 2;   /* (T_RP  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RCD = 2;   /* (T_RCD * DDRCLK) - 1 */
+static const uint8_t  DDR_T_WR  = 2;   /* (T_WR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RAS = 6;   /* (T_RAS * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RC  = 9;   /* (T_RC  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RRD = 1;   /* [((4 * T_RRD) + (2 * TCK)) / (4 * TCK)] - 1 */
+/*
+ * Only for 8 bank DDR2/mDDR memories. When interfacing to DDR2/mDDR memories
+ * with less than 8 banks the T_RRD field should be calculated using:
+ *   (T_RRD * DDRCLK) - 1.
+ */
+static const uint8_t  DDR_T_WTR    = 1;  /* (T_WTR  * DDRCLK) - 1 */
+static const uint8_t  DDR_T_RASMAX = 15; /*
+					  * (T_RASMAX(uS) / T_REF) - 1
+					  * Should be 17 but max. value
+					  * is 15 (4 bits)
+					  */
+static const uint8_t  DDR_T_XP   = 2;   /*
+					 * If T_XP > T_CKE then
+					 *   T_XP = T_XP - 1
+					 * else
+					 *   T_XP = T_CKE - 1
+					 */
+static const uint8_t  DDR_T_XSNR = 34;  /* (T_XSNR * DDRCLK) - 1 */
+static const uint8_t  DDR_T_XSRD = 199; /* T_XSRD - 1 */
+static const uint8_t  DDR_T_RTP  = 1;   /* (T_RTP * DDRCLK) - 1 */
+static const uint8_t  DDR_T_CKE  = 2;   /* T_CKE - 1 */
+static const uint8_t  DDR_READ_Latency = 4; /* Board specific */
+
+#endif
+
+#endif /* _DDDR_H_ */
diff --git a/dm35x.c b/dm35x.c
new file mode 100644
index 0000000..aa5fea7
--- /dev/null
+++ b/dm35x.c
@@ -0,0 +1,121 @@
+/*
+ * dm35x.c - DM35x specific platform initialization
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "davinci.h"
+#include "util.h"
+
+#define DEVICE_VTPIOCR_PWRDN_MASK       (0x00000040)
+#define DEVICE_VTPIOCR_LOCK_MASK        (0x00000080)
+#define DEVICE_VTPIOCR_PWRSAVE_MASK     (0x00000100)
+#define DEVICE_VTPIOCR_CLR_MASK         (0x00002000)
+#define DEVICE_VTPIOCR_VTPIOREADY_MASK  (0x00004000)
+#define DEVICE_VTPIOCR_READY_MASK       (0x00008000)
+
+/* List of modules to enable in the PSC */
+const int8_t lpsc_en_list[] = {
+	LPSC_VPSS_MAST,
+	LPSC_VPSS_SLV,
+	LPSC_EDMACC,
+	LPSC_EDMATC0,
+	LPSC_EDMATC1,
+	LPSC_TIMER3,
+	LPSC_SPI1,
+	LPSC_MMC_SD1,
+	LPSC_ASP1,
+	LPSC_USB,
+	LPSC_PWM3,
+	LPSC_SPI2,
+	LPSC_RTO,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_MEMSTK,
+	LPSC_ASP0,
+	LPSC_I2C,
+	LPSC_UART0,
+	LPSC_UART1,
+	LPSC_UART2,
+	LPSC_SPIO,
+	LPSC_PWM0,
+	LPSC_PWM1,
+	LPSC_PWM2,
+	LPSC_GPIO,
+	LPSC_TIMER0,
+	LPSC_TIMER1,
+	LPSC_TIMER2,
+	LPSC_SYSMOD,
+	LPSC_ARM,
+	LPSC_VPSS_DAC,
+};
+
+const size_t lpsc_en_list_len = sizeof(lpsc_en_list) /
+	sizeof(lpsc_en_list[0]);
+
+/* List of modules for which to control EMURSTIE */
+const int8_t lpsc_emurstie_list[] = {
+	LPSC_VPSS_MAST,
+	LPSC_VPSS_SLV,
+	LPSC_TIMER3,
+	LPSC_SPI1,
+	LPSC_USB,
+	LPSC_PWM3,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_ASP0,
+	LPSC_GPIO,
+	LPSC_VPSS_DAC,
+};
+
+const size_t lpsc_emurstie_list_len = sizeof(lpsc_emurstie_list) /
+	sizeof(lpsc_emurstie_list[0]);
+
+/* DDR2 VTP Calibration */
+void
+ddr_vtp_calibration(void)
+{
+	/* DO VTP calibration:
+	 * Clear CLR & PWRDN & LOCK bits */
+	SYSTEM->VTPIOCR &= ~(DEVICE_VTPIOCR_PWRDN_MASK |
+			     DEVICE_VTPIOCR_LOCK_MASK |
+			     DEVICE_VTPIOCR_CLR_MASK);
+
+	/* Un-clear VTP */
+	SYSTEM->VTPIOCR |= DEVICE_VTPIOCR_CLR_MASK;
+
+	/* Wait for ready */
+	while (!(SYSTEM->VTPIOCR & DEVICE_VTPIOCR_READY_MASK));
+
+	/* Set bit VTP_IO_READY */
+	SYSTEM->VTPIOCR |= DEVICE_VTPIOCR_VTPIOREADY_MASK;
+
+	/* Enable power save mode and lock impedance */
+	SYSTEM->VTPIOCR |= (DEVICE_VTPIOCR_PWRSAVE_MASK |
+			    DEVICE_VTPIOCR_LOCK_MASK);
+
+	/* Powerdown VTP as it is locked */
+	SYSTEM->VTPIOCR |= DEVICE_VTPIOCR_PWRDN_MASK;
+
+	/* Wait for calibration to complete */
+	waitloop(150);
+}
diff --git a/dm35x.h b/dm35x.h
new file mode 100644
index 0000000..c214ed0
--- /dev/null
+++ b/dm35x.h
@@ -0,0 +1,64 @@
+/*
+ * dm35x.h - DM35x specific platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DM35X_H_
+#define _DM35X_H_
+
+#include "common.h"
+
+#define SYSTEM_CLK_HZ 24000000
+
+#define VPBE_CLK_HZ   27000000
+
+#define PLL1_Mult 144 /* 216 MHz */
+
+#define PLL2_Mult 114
+#define PLL2_Div1   2 /* Fixed */
+#define PLL2_Div2  -1 /* PLLDIV2 not used */
+
+#define DEVICE_MISC_PLL1POSTDIV_MASK	0x00000002
+#define DEVICE_MISC_AIMWAITST_MASK	0x00000001
+#define DEVICE_MISC_TIMER2WDT_MASK	0x00000010
+
+#define UBL_IMAGE_SIZE 0x7800 /* 30 kB UBL (2Kb reserved for RBL stack) */
+
+/* Global Memory Timing and PLL Settings */
+static const uint8_t  DDR_NM = 1;	/* 16-bit bus width only on DM35x. */
+static const uint8_t  DDR_PAGESIZE = 2;	/* 1024-word page size. */
+
+#define DDR_PBBPR_PR_OLD_COUNT 0x000000FE;
+
+/* PINMUX2 register bit values */
+#define PINMUX2_EM_CLK     (1 << 11)
+#define PINMUX2_EM_AVD     (1 << 10)
+#define PINMUX2_EM_WAIT    (1 <<  9)
+#define PINMUX2_EM_WE_OE   (1 <<  8)
+#define PINMUX2_EM_CE1     (1 <<  7)
+#define PINMUX2_EM_CE0     (1 <<  6)
+#define PINMUX2_EM_D7_0    (1 <<  5)
+#define PINMUX2_EM_D15_8   (1 <<  4)
+#define PINMUX2_EM_BA0     (1 <<  2)
+#define PINMUX2_EM_A0_BA1  (1 <<  1)
+#define PINMUX2_EM_A13_3   (1 <<  0)
+
+#endif /* _DM35X_H_ */
diff --git a/dm644x.c b/dm644x.c
new file mode 100644
index 0000000..2d55f62
--- /dev/null
+++ b/dm644x.c
@@ -0,0 +1,113 @@
+/*
+ * dm644x.c - DM644x specific platform initialization
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "davinci.h"
+#include "util.h"
+
+#define VTPIOCR_EN_MASK       (1 << 13)
+#define VTPIOCR_RECAL_MASK    (1 << 15)
+
+#define DDRVTPER_EN_MASK      (1 << 0)
+
+#define DDRVTPR_CAL_DATA_MASK 0x03FF
+
+/* List of modules to enable in the PSC */
+const int8_t lpsc_en_list[] = {
+	LPSC_VPSS_MAST,
+	LPSC_VPSS_SLV,
+	LPSC_EDMACC,
+	LPSC_EDMATC0,
+	LPSC_EDMATC1,
+	LPSC_EMAC,
+	LPSC_EMAC_MEM_CTL,
+	LPSC_MDIO,
+	LPSC_USB,
+	LPSC_ATA,
+	LPSC_VLYNQ,
+	LPSC_HPI,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_ASP0,
+	LPSC_I2C,
+	LPSC_UART0,
+	LPSC_GPIO,
+	LPSC_TIMER0,
+	LPSC_ARM,
+	LPSC_IMCOP,
+};
+
+const size_t lpsc_en_list_len = sizeof(lpsc_en_list) /
+	sizeof(lpsc_en_list[0]);
+
+/* List of modules for which to control EMURSTIE */
+const int8_t lpsc_emurstie_list[] = {
+	LPSC_VPSS_SLV,
+	LPSC_EMAC,
+	LPSC_EMAC_MEM_CTL,
+	LPSC_MDIO,
+	LPSC_USB,
+	LPSC_ATA,
+	LPSC_VLYNQ,
+	LPSC_HPI,
+	LPSC_DDR2,
+	LPSC_AEMIF,
+	LPSC_MMC_SD0,
+	LPSC_ASP0,
+	LPSC_GPIO,
+	LPSC_IMCOP,
+};
+
+const size_t lpsc_emurstie_list_len = sizeof(lpsc_emurstie_list) /
+	sizeof(lpsc_emurstie_list[0]);
+
+/* DDR2 VTP Calibration */
+void
+ddr_vtp_calibration(void)
+{
+	int32_t cal_data;
+
+	/* Enable VTP IO calibration bit (not started) */
+	DDR->VTPIOCR = 0x0000001F | VTPIOCR_EN_MASK;
+
+	/* Start VTP IO calibration */
+	DDR->VTPIOCR |= VTPIOCR_RECAL_MASK;
+
+	/* Wait for calibration to complete */
+	waitloop(11*33);
+
+	/* Enable access to DDRVTPR */
+	SYSTEM->DDRVTPER = DDRVTPER_EN_MASK;
+
+	cal_data = DDRVTPR & DDRVTPR_CAL_DATA_MASK; /* Read calibration data */
+
+	/* Write calibration data to VTP Control register */
+	DDR->VTPIOCR &= ~DDRVTPR_CAL_DATA_MASK;
+	DDR->VTPIOCR |= cal_data;
+
+	/* Disable VTP IO calibration bit */
+	DDR->VTPIOCR &= ~VTPIOCR_EN_MASK;
+
+	/* Disable access to DDRVTPR */
+	SYSTEM->DDRVTPER = 0;
+}
diff --git a/dm644x.h b/dm644x.h
new file mode 100644
index 0000000..511d202
--- /dev/null
+++ b/dm644x.h
@@ -0,0 +1,87 @@
+/*
+ * dm644x.h - DM644x specific platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DM644X_H_
+#define _DM644X_H_
+
+#include "common.h"
+
+#define SYSTEM_CLK_HZ 27000000
+
+#define PLL1_Mult 22 /* DSP=594 MHz ARM=297 MHz */
+
+#define PLL2_Mult 24 /* DDRPHY=324 MHz DDRCLK=162 MHz */
+#define PLL2_Div1 12
+#define PLL2_Div2  2
+
+#define UBL_IMAGE_SIZE 0x3800 /* 14 kB UBL (2Kb reserved for RBL stack) */
+
+/* Global Memory Timing and PLL Settings */
+static const uint8_t  DDR_NM = 0;	/* 32-bit bus width by default. */
+static const uint8_t  DDR_PAGESIZE = 2;	/* 1024-word page size. */
+
+/*
+ * See TMS320DM6446 errata 2.1.2:
+ *   A value of $20 should provide a good ARM (cache enabled)
+ *   performance and still allow good utilization by the VPSS or other
+ *   modules.
+ */
+#define DDR_PBBPR_PR_OLD_COUNT 0x00000020;
+
+/* PINMUX0 register bit values */
+#define PINMUX0_EMACEN   (1 << 31)
+#define PINMUX0_HPIEN    (1 << 29)
+#define PINMUX0_CFLDEN   (1 << 27)
+#define PINMUX0_CWE      (1 << 26)
+#define PINMUX0_LFLDEN   (1 << 25)
+#define PINMUX0_LOEEN    (1 << 24)
+#define PINMUX0_RGB888   (1 << 23)
+#define PINMUX0_RGB666   (1 << 22)
+#define PINMUX0_ATAEN    (1 << 17)
+#define PINMUX0_HDIREN   (1 << 16)
+#define PINMUX0_VLYNQEN  (1 << 15)
+#define PINMUX0_VLSCREN  (1 << 14)
+#define PINMUX0_VLYNQWD1 (1 << 13)
+#define PINMUX0_VLYNQWD0 (1 << 12)
+
+#define VLYNQ_WIDTH_1 (0 << 12)
+#define VLYNQ_WIDTH_2 (1 << 12)
+#define VLYNQ_WIDTH_3 (3 << 12) /* See TI SPRUE26A document. */
+#define VLYNQ_WIDTH_4 (2 << 12) /* See TI SPRUE26A document. */
+
+/* PINMUX1 register bit values */
+#define PINMUX1_TIMIN (1 << 18)
+#define PINMUX1_CLK1  (1 << 17)
+#define PINMUX1_CLK0  (1 << 16)
+#define PINMUX1_ASP   (1 << 10)
+#define PINMUX1_SPI   (1 <<  8)
+#define PINMUX1_I2C   (1 <<  7)
+#define PINMUX1_PWM2  (1 <<  6)
+#define PINMUX1_PWM1  (1 <<  5)
+#define PINMUX1_PWM0  (1 <<  4)
+#define PINMUX1_U2FLO (1 <<  3)
+#define PINMUX1_UART2 (1 <<  2)
+#define PINMUX1_UART1 (1 <<  1)
+#define PINMUX1_UART0 (1 <<  0)
+
+#endif /* _DM644X_H_ */
diff --git a/gpio.c b/gpio.c
new file mode 100644
index 0000000..e09f97a
--- /dev/null
+++ b/gpio.c
@@ -0,0 +1,91 @@
+/*
+ * gpio.c - GPIO handling
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on davinci gpio code from the Linux kernel, original copyright follows:
+ *   Copyright (c) 2006-2007 David Brownell
+ *   Copyright (c) 2007, MontaVista Software, Inc. <source@mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "gpio.h"
+
+static struct gpio_controller *
+gpio_to_controller(unsigned gpio)
+{
+	void *ptr;
+
+	if (gpio < 32 * 1)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x10;
+	else if (gpio < 32 * 2)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x38;
+	else if (gpio < 32 * 3)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x60;
+	else if (gpio < 32 * 4)
+		ptr = (void *) DAVINCI_GPIO_BASE + 0x88;
+	else
+		ptr = NULL;
+
+	return ptr;
+}
+
+static inline uint32_t
+gpio_mask(unsigned gpio)
+{
+	return 1 << (gpio % 32);
+}
+
+int
+gpio_direction_in(unsigned gpio)
+{
+	volatile struct gpio_controller *g = gpio_to_controller(gpio);
+	uint32_t mask = gpio_mask(gpio);
+
+	g->dir |= mask;
+
+	return 0;
+}
+
+int
+gpio_direction_out(unsigned gpio, int initial_value)
+{
+	volatile struct gpio_controller *g = gpio_to_controller(gpio);
+	uint32_t mask = gpio_mask(gpio);
+
+	if (initial_value)
+		g->set_data = mask;
+	else
+		g->clr_data = mask;
+
+	g->dir &= ~mask;
+
+	return 0;
+}
+
+void
+gpio_set(unsigned gpio, int state)
+{
+	volatile struct gpio_controller *g = gpio_to_controller(gpio);
+	uint32_t mask = gpio_mask(gpio);
+
+	if (state)
+		g->set_data = mask;
+	else
+		g->clr_data = mask;
+}
diff --git a/gpio.h b/gpio.h
new file mode 100644
index 0000000..ca941e3
--- /dev/null
+++ b/gpio.h
@@ -0,0 +1,40 @@
+/*
+ * gpio.h - Gpio specific platform definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _GPIO_H_
+#define _GPIO_H_
+
+#include "common.h"
+
+#define GPIO(X) (X) /* 0 <= X <= (DAVINCI_N_GPIO - 1) */
+
+int
+gpio_direction_in(unsigned gpio);
+
+int
+gpio_direction_out(unsigned gpio, int initial_value);
+
+void
+gpio_set(unsigned gpio, int state);
+
+#endif /* _GPIO_H_ */
diff --git a/gunzip.c b/gunzip.c
new file mode 100644
index 0000000..b383ca7
--- /dev/null
+++ b/gunzip.c
@@ -0,0 +1,868 @@
+/* gunzip.c - puff implementation
+ *
+ * Copyright (C) 2002-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in puff.h
+ * version 1.8, 9 Jan 2004
+ *
+ * puff.c is a simple inflate written to be an unambiguous way to specify the
+ * deflate format.  It is not written for speed but rather simplicity.  As a
+ * side benefit, this code might actually be useful when small code is more
+ * important than speed, such as bootstrap applications.  For typical deflate
+ * data, zlib's inflate() is about four times as fast as puff().  zlib's
+ * inflate compiles to around 20K on my machine, whereas puff.c compiles to
+ * around 4K on my machine (a PowerPC using GNU cc).  If the faster decode()
+ * function here is used, then puff() is only twice as slow as zlib's
+ * inflate().
+ *
+ * All dynamically allocated memory comes from the stack.  The stack required
+ * is less than 2K bytes.  This code is compatible with 16-bit int's and
+ * assumes that long's are at least 32 bits.  puff.c uses the short data type,
+ * assumed to be 16 bits, for arrays in order to to conserve memory.  The code
+ * works whether integers are stored big endian or little endian.
+ *
+ * In the comments below are "Format notes" that describe the inflate process
+ * and document some of the less obvious aspects of the format.  This source
+ * code is meant to supplement RFC 1951, which formally describes the deflate
+ * format:
+ *
+ *    http://www.zlib.org/rfc-deflate.html
+ */
+
+/*
+ * Change history:
+ *
+ * 1.0  10 Feb 2002     - First version
+ * 1.1  17 Feb 2002     - Clarifications of some comments and notes
+ *                      - Update puff() dest and source pointers on negative
+ *                        errors to facilitate debugging deflators
+ *                      - Remove longest from struct huffman -- not needed
+ *                      - Simplify offs[] index in construct()
+ *                      - Add input size and checking, using longjmp() to
+ *                        maintain easy readability
+ *                      - Use short data type for large arrays
+ *                      - Use pointers instead of long to specify source and
+ *                        destination sizes to avoid arbitrary 4 GB limits
+ * 1.2  17 Mar 2002     - Add faster version of decode(), doubles speed (!),
+ *                        but leave simple version for readabilty
+ *                      - Make sure invalid distances detected if pointers
+ *                        are 16 bits
+ *                      - Fix fixed codes table error
+ *                      - Provide a scanning mode for determining size of
+ *                        uncompressed data
+ * 1.3  20 Mar 2002     - Go back to lengths for puff() parameters [Jean-loup]
+ *                      - Add a puff.h file for the interface
+ *                      - Add braces in puff() for else do [Jean-loup]
+ *                      - Use indexes instead of pointers for readability
+ * 1.4  31 Mar 2002     - Simplify construct() code set check
+ *                      - Fix some comments
+ *                      - Add FIXLCODES #define
+ * 1.5   6 Apr 2002     - Minor comment fixes
+ * 1.6   7 Aug 2002     - Minor format changes
+ * 1.7   3 Mar 2003     - Added test code for distribution
+ *                      - Added zlib-like license
+ * 1.8   9 Jan 2004     - Added some comments on no distance codes case
+ * 1.9  20 Feb 2009     - Hugo villeneuve: changed puff function name to gunzip
+ */
+
+#include "uart.h"
+
+#define NIL ((unsigned char *)0)        /* for no output option */
+
+/*
+ * Maximums for allocations and loops.  It is not useful to change these --
+ * they are fixed by the deflate format.
+ */
+#define MAXBITS 15              /* maximum bits in a code */
+#define MAXLCODES 286           /* maximum number of literal/length codes */
+#define MAXDCODES 30            /* maximum number of distance codes */
+#define MAXCODES (MAXLCODES+MAXDCODES)  /* maximum codes lengths to read */
+#define FIXLCODES 288           /* number of fixed literal/length codes */
+
+/* input and output state */
+struct state {
+	/* output state */
+	unsigned char *out;         /* output buffer */
+	unsigned long outlen;       /* available space at out */
+	unsigned long outcnt;       /* bytes written to out so far */
+
+	/* input state */
+	unsigned char *in;          /* input buffer */
+	unsigned long inlen;        /* available input at in */
+	unsigned long incnt;        /* bytes read so far */
+	int bitbuf;                 /* bit buffer */
+	int bitcnt;                 /* number of bits in bit buffer */
+};
+
+static int pufferror;
+
+/*
+ * Moved those variables from internal stack to DDR.
+ * They were using too much internal stack and corrupted data.
+ */
+short distcnt[MAXBITS+1] __attribute__((section(".ddrram"))); /* distcode memory */
+short distsym[MAXDCODES] __attribute__((section(".ddrram"))); /* distcode memory */
+short lencnt[MAXBITS+1] __attribute__((section(".ddrram"))); /* lencode memory */
+short dyn_lengths[MAXCODES] __attribute__((section(".ddrram"))); /* descriptor code lengths */
+short lensym[FIXLCODES] __attribute__((section(".ddrram")));
+
+/*
+ * Return need bits from the input stream.  This always leaves less than
+ * eight bits in the buffer.  bits() works properly for need == 0.
+ *
+ * Format notes:
+ *
+ * - Bits are stored in bytes from the least significant bit to the most
+ *   significant bit.  Therefore bits are dropped from the bottom of the bit
+ *   buffer, using shift right, and new bytes are appended to the top of the
+ *   bit buffer, using shift left.
+ */
+static int
+bits(struct state *s, int need)
+{
+	long val;           /* bit accumulator (can use up to 20 bits) */
+
+	/* load at least need bits into val */
+	val = s->bitbuf;
+
+	while (s->bitcnt < need) {
+		if (s->incnt == s->inlen) {
+			pufferror = -1;
+			return 0; /* out of input */
+		}
+
+		/* load eight bits */
+		val |= (long)(s->in[s->incnt++]) << s->bitcnt;
+		s->bitcnt += 8;
+	}
+
+	/* drop need bits and update buffer, always zero to seven bits left */
+	s->bitbuf = (int)(val >> need);
+
+	s->bitcnt -= need;
+
+	/* return need bits, zeroing the bits above that */
+	return (int)(val & ((1L << need) - 1));
+}
+
+/*
+ * Process a stored block.
+ *
+ * Format notes:
+ *
+ * - After the two-bit stored block type (00), the stored block length and
+ *   stored bytes are byte-aligned for fast copying.  Therefore any leftover
+ *   bits in the byte that has the last bit of the type, as many as seven, are
+ *   discarded.  The value of the discarded bits are not defined and should not
+ *   be checked against any expectation.
+ *
+ * - The second inverted copy of the stored block length does not have to be
+ *   checked, but it's probably a good idea to do so anyway.
+ *
+ * - A stored block can have zero length.  This is sometimes used to byte-align
+ *   subsets of the compressed data for random access or partial recovery.
+ */
+static int
+stored(struct state *s)
+{
+	unsigned len;       /* length of stored block */
+
+	/* discard leftover bits from current byte (assumes s->bitcnt < 8) */
+	s->bitbuf = 0;
+	s->bitcnt = 0;
+
+	/* get length and check against its one's complement */
+	if (s->incnt + 4 > s->inlen)
+		return 2; /* not enough input */
+	len = s->in[s->incnt++];
+	len |= s->in[s->incnt++] << 8;
+	if (s->in[s->incnt++] != (~len & 0xff) ||
+	    s->in[s->incnt++] != ((~len >> 8) & 0xff))
+		return -2; /* didn't match complement! */
+
+	/* copy len bytes from in to out */
+	if (s->incnt + len > s->inlen)
+		return 2; /* not enough input */
+	if (s->out != NIL) {
+		if (s->outcnt + len > s->outlen)
+			return 1; /* not enough output space */
+		while (len--)
+			s->out[s->outcnt++] = s->in[s->incnt++];
+	} else { /* just scanning */
+		s->outcnt += len;
+		s->incnt += len;
+	}
+
+	/* done with a valid stored block */
+	return 0;
+}
+
+/*
+ * Huffman code decoding tables.  count[1..MAXBITS] is the number of symbols of
+ * each length, which for a canonical code are stepped through in order.
+ * symbol[] are the symbol values in canonical order, where the number of
+ * entries is the sum of the counts in count[].  The decoding process can be
+ * seen in the function decode() below.
+ */
+struct huffman {
+	short *count;       /* number of symbols of each length */
+	short *symbol;      /* canonically ordered symbols */
+};
+
+/*
+ * Decode a code from the stream s using huffman table h.  Return the symbol or
+ * a negative value if there is an error.  If all of the lengths are zero, i.e.
+ * an empty code, or if the code is incomplete and an invalid code is received,
+ * then -9 is returned after reading MAXBITS bits.
+ *
+ * Format notes:
+ *
+ * - The codes as stored in the compressed data are bit-reversed relative to
+ *   a simple integer ordering of codes of the same lengths.  Hence below the
+ *   bits are pulled from the compressed data one at a time and used to
+ *   build the code value reversed from what is in the stream in order to
+ *   permit simple integer comparisons for decoding.  A table-based decoding
+ *   scheme (as used in zlib) does not need to do this reversal.
+ *
+ * - The first code for the shortest length is all zeros.  Subsequent codes of
+ *   the same length are simply integer increments of the previous code.  When
+ *   moving up a length, a zero bit is appended to the code.  For a complete
+ *   code, the last code of the longest length will be all ones.
+ *
+ * - Incomplete codes are handled by this decoder, since they are permitted
+ *   in the deflate format.  See the format notes for fixed() and dynamic().
+ */
+#ifdef SLOW
+static int
+decode(struct state *s, struct huffman *h)
+{
+	int len;   /* current number of bits in code */
+	int code;  /* len bits being decoded */
+	int first; /* first code of length len */
+	int count; /* number of codes of length len */
+	int index; /* index of first code of length len in symbol table */
+
+	code = first = index = 0;
+	for (len = 1; len <= MAXBITS; len++) {
+		code |= bits(s, 1); /* get next bit */
+		if (pufferror)
+			return -1;
+
+		count = h->count[len];
+		if (code < first + count) /* if length len, return symbol */
+			return h->symbol[index + (code - first)];
+		index += count; /* else update for next length */
+		first += count;
+		first <<= 1;
+		code <<= 1;
+	}
+	return -9; /* ran out of codes */
+}
+
+/*
+ * A faster version of decode() for real applications of this code.   It's not
+ * as readable, but it makes puff() twice as fast.  And it only makes the code
+ * a few percent larger.
+ */
+#else /* !SLOW */
+static int
+decode(struct state *s, struct huffman *h)
+{
+	int len;     /* current number of bits in code */
+	int code;    /* len bits being decoded */
+	int first;   /* first code of length len */
+	int count;   /* number of codes of length len */
+	int index;   /* index of first code of length len in symbol table */
+	int bitbuf;  /* bits from stream */
+	int left;    /* bits left in next or left to process */
+	short *next; /* next number of codes */
+
+	bitbuf = s->bitbuf;
+	left = s->bitcnt;
+	code = first = index = 0;
+	len = 1;
+	next = h->count + 1;
+	while (1) {
+		while (left--) {
+			code |= bitbuf & 1;
+			bitbuf >>= 1;
+			count = *next++;
+			if (code < first + count) {
+				/* if length len, return symbol */
+				s->bitbuf = bitbuf;
+				s->bitcnt = (s->bitcnt - len) & 7;
+				return h->symbol[index + (code - first)];
+			}
+			index += count; /* else update for next length */
+			first += count;
+			first <<= 1;
+			code <<= 1;
+			len++;
+		}
+		left = (MAXBITS+1) - len;
+		if (left == 0)
+			break;
+		if (s->incnt == s->inlen) {
+			pufferror = -1; /* out of input */
+			return -1;
+		}
+
+		bitbuf = s->in[s->incnt++];
+		if (left > 8)
+			left = 8;
+	}
+	return -9; /* ran out of codes */
+}
+#endif /* SLOW */
+
+/*
+ * Given the list of code lengths length[0..n-1] representing a canonical
+ * Huffman code for n symbols, construct the tables required to decode those
+ * codes.  Those tables are the number of codes of each length, and the symbols
+ * sorted by length, retaining their original order within each length.  The
+ * return value is zero for a complete code set, negative for an over-
+ * subscribed code set, and positive for an incomplete code set.  The tables
+ * can be used if the return value is zero or positive, but they cannot be used
+ * if the return value is negative.  If the return value is zero, it is not
+ * possible for decode() using that table to return an error--any stream of
+ * enough bits will resolve to a symbol.  If the return value is positive, then
+ * it is possible for decode() using that table to return an error for received
+ * codes past the end of the incomplete lengths.
+ *
+ * Not used by decode(), but used for error checking, h->count[0] is the number
+ * of the n symbols not in the code.  So n - h->count[0] is the number of
+ * codes.  This is useful for checking for incomplete codes that have more than
+ * one symbol, which is an error in a dynamic block.
+ *
+ * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS
+ * This is assured by the construction of the length arrays in dynamic() and
+ * fixed() and is not verified by construct().
+ *
+ * Format notes:
+ *
+ * - Permitted and expected examples of incomplete codes are one of the fixed
+ *   codes and any code with a single symbol which in deflate is coded as one
+ *   bit instead of zero bits.  See the format notes for fixed() and dynamic().
+ *
+ * - Within a given code length, the symbols are kept in ascending order for
+ *   the code bits definition.
+ */
+static int
+construct(struct huffman *h, short *length, int n)
+{
+	int symbol; /* current symbol when stepping through length[] */
+	int len;    /* current length when stepping through h->count[] */
+	int left;   /* number of possible codes left of current length */
+	short offs[MAXBITS+1]; /* offsets in symbol table for each length */
+
+	/* count number of codes of each length */
+	for (len = 0; len <= MAXBITS; len++)
+		h->count[len] = 0;
+
+	/* assumes lengths are within bounds */
+	for (symbol = 0; symbol < n; symbol++)
+		(h->count[length[symbol]])++;
+	if (h->count[0] == n) /* no codes! */
+		return 0;     /* complete, but decode() will fail */
+
+	/* check for an over-subscribed or incomplete set of lengths */
+	left = 1; /* one possible code of zero length */
+	for (len = 1; len <= MAXBITS; len++) {
+		left <<= 1;            /* one more bit, double codes left */
+		left -= h->count[len]; /* deduct count from possible codes */
+		if (left < 0)
+			return left; /* over-subscribed--return negative */
+	} /* left > 0 means incomplete */
+
+	/* generate offsets into symbol table for each length for sorting */
+	offs[1] = 0;
+	for (len = 1; len < MAXBITS; len++)
+		offs[len + 1] = offs[len] + h->count[len];
+
+	/*
+	 * put symbols in table sorted by length, by symbol order within each
+	 * length
+	 */
+	for (symbol = 0; symbol < n; symbol++)
+		if (length[symbol] != 0)
+			h->symbol[offs[length[symbol]]++] = symbol;
+
+	/* return zero for complete set, positive for incomplete set */
+	return left;
+}
+
+/*
+ * Decode literal/length and distance codes until an end-of-block code.
+ *
+ * Format notes:
+ *
+ * - Compressed data that is after the block type if fixed or after the code
+ *   description if dynamic is a combination of literals and length/distance
+ *   pairs terminated by and end-of-block code.  Literals are simply Huffman
+ *   coded bytes.  A length/distance pair is a coded length followed by a
+ *   coded distance to represent a string that occurs earlier in the
+ *   uncompressed data that occurs again at the current location.
+ *
+ * - Literals, lengths, and the end-of-block code are combined into a single
+ *   code of up to 286 symbols.  They are 256 literals (0..255), 29 length
+ *   symbols (257..285), and the end-of-block symbol (256).
+ *
+ * - There are 256 possible lengths (3..258), and so 29 symbols are not enough
+ *   to represent all of those.  Lengths 3..10 and 258 are in fact represented
+ *   by just a length symbol.  Lengths 11..257 are represented as a symbol and
+ *   some number of extra bits that are added as an integer to the base length
+ *   of the length symbol.  The number of extra bits is determined by the base
+ *   length symbol.  These are in the static arrays below, lens[] for the base
+ *   lengths and lext[] for the corresponding number of extra bits.
+ *
+ * - The reason that 258 gets its own symbol is that the longest length is used
+ *   often in highly redundant files.  Note that 258 can also be coded as the
+ *   base value 227 plus the maximum extra value of 31.  While a good deflate
+ *   should never do this, it is not an error, and should be decoded properly.
+ *
+ * - If a length is decoded, including its extra bits if any, then it is
+ *   followed a distance code.  There are up to 30 distance symbols.  Again
+ *   there are many more possible distances (1..32768), so extra bits are added
+ *   to a base value represented by the symbol.  The distances 1..4 get their
+ *   own symbol, but the rest require extra bits.  The base distances and
+ *   corresponding number of extra bits are below in the static arrays dist[]
+ *   and dext[].
+ *
+ * - Literal bytes are simply written to the output.  A length/distance pair is
+ *   an instruction to copy previously uncompressed bytes to the output.  The
+ *   copy is from distance bytes back in the output stream, copying for length
+ *   bytes.
+ *
+ * - Distances pointing before the beginning of the output data are not
+ *   permitted.
+ *
+ * - Overlapped copies, where the length is greater than the distance, are
+ *   allowed and common.  For example, a distance of one and a length of 258
+ *   simply copies the last byte 258 times.  A distance of four and a length of
+ *   twelve copies the last four bytes three times.  A simple forward copy
+ *   ignoring whether the length is greater than the distance or not implements
+ *   this correctly.  You should not use memcpy() since its behavior is not
+ *   defined for overlapped arrays.  You should not use memmove() or bcopy()
+ *   since though their behavior -is- defined for overlapping arrays, it is
+ *   defined to do the wrong thing in this case.
+ */
+static int
+codes(struct state *s,
+      struct huffman *lencode,
+      struct huffman *distcode)
+{
+	int symbol;         /* decoded symbol */
+	int len;            /* length for copy */
+	unsigned dist;      /* distance for copy */
+	/* Size base for length codes 257-285 */
+	static const short lens[29] = {
+		3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+		35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
+	/* Extra bits for length codes 257-285 */
+	static const short lext[29] = {
+		0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+		3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
+	/* Offset base for distance codes 0-29 */
+	static const short dists[30] = {
+		1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+		257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+		8193, 12289, 16385, 24577};
+	/* Extra bits for distance codes 0-29 */
+	static const short dext[30] = {
+		0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+		7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+		12, 12, 13, 13};
+
+	/* decode literals and length/distance pairs */
+	do {
+		symbol = decode(s, lencode);
+		if (symbol < 0)
+			return symbol; /* invalid symbol */
+		if (symbol < 256) {            /* literal: symbol is the byte */
+			/* write out the literal */
+			if (s->out != NIL) {
+				if (s->outcnt == s->outlen)
+					return 1;
+				s->out[s->outcnt] = symbol;
+			}
+			s->outcnt++;
+		} else if (symbol > 256) {        /* length */
+			/* get and compute length */
+			symbol -= 257;
+			if (symbol >= 29)
+				return -9;        /* invalid fixed code */
+			len = lens[symbol] + bits(s, lext[symbol]);
+			if (pufferror)
+				return -1;
+
+			/* get and check distance */
+			symbol = decode(s, distcode);
+			if (symbol < 0)
+				return symbol;      /* invalid symbol */
+			dist = dists[symbol] + bits(s, dext[symbol]);
+			if (pufferror)
+				return -1;
+
+			if (dist > s->outcnt)
+				return -10;     /* distance too far back */
+
+			/* copy length bytes from distance bytes back */
+			if (s->out != NIL) {
+				if (s->outcnt + len > s->outlen)
+					return 1;
+				while (len--) {
+					s->out[s->outcnt] =
+						s->out[s->outcnt - dist];
+					s->outcnt++;
+				}
+			} else
+				s->outcnt += len;
+		}
+	} while (symbol != 256);            /* end of block symbol */
+
+	/* done with a valid fixed or dynamic block */
+	return 0;
+}
+
+/*
+ * Process a fixed codes block.
+ *
+ * Format notes:
+ *
+ * - This block type can be useful for compressing small amounts of data for
+ *   which the size of the code descriptions in a dynamic block exceeds the
+ *   benefit of custom codes for that block.  For fixed codes, no bits are
+ *   spent on code descriptions.  Instead the code lengths for literal/length
+ *   codes and distance codes are fixed.  The specific lengths for each symbol
+ *   can be seen in the "for" loops below.
+ *
+ * - The literal/length code is complete, but has two symbols that are invalid
+ *   and should result in an error if received.  This cannot be implemented
+ *   simply as an incomplete code since those two symbols are in the "middle"
+ *   of the code.  They are eight bits long and the longest literal/length\
+ *   code is nine bits.  Therefore the code must be constructed with those
+ *   symbols, and the invalid symbols must be detected after decoding.
+ *
+ * - The fixed distance codes also have two invalid symbols that should result
+ *   in an error if received.  Since all of the distance codes are the same
+ *   length, this can be implemented as an incomplete code.  Then the invalid
+ *   codes are detected while decoding.
+ */
+static int
+fixed(struct state *s)
+{
+	static int virgin = 1;
+	//////////////static short lencnt[MAXBITS+1];
+	//////static short lensym[FIXLCODES];
+	///////////////////static short distcnt[MAXBITS+1], distsym[MAXDCODES];
+	static struct huffman lencode = {lencnt, lensym};
+	static struct huffman distcode = {distcnt, distsym};
+
+	/* build fixed huffman tables if first call (may not be thread safe) */
+	if (virgin) {
+		int symbol;
+		short lengths[FIXLCODES];
+
+		/* literal/length table */
+		for (symbol = 0; symbol < 144; symbol++)
+			lengths[symbol] = 8;
+		for (; symbol < 256; symbol++)
+			lengths[symbol] = 9;
+		for (; symbol < 280; symbol++)
+			lengths[symbol] = 7;
+		for (; symbol < FIXLCODES; symbol++)
+			lengths[symbol] = 8;
+		construct(&lencode, lengths, FIXLCODES);
+
+		/* distance table */
+		for (symbol = 0; symbol < MAXDCODES; symbol++)
+			lengths[symbol] = 5;
+		construct(&distcode, lengths, MAXDCODES);
+
+		/* do this just once */
+		virgin = 0;
+	}
+
+	/* decode data until end-of-block code */
+	return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Process a dynamic codes block.
+ *
+ * Format notes:
+ *
+ * - A dynamic block starts with a description of the literal/length and
+ *   distance codes for that block.  New dynamic blocks allow the compressor to
+ *   rapidly adapt to changing data with new codes optimized for that data.
+ *
+ * - The codes used by the deflate format are "canonical", which means that
+ *   the actual bits of the codes are generated in an unambiguous way simply
+ *   from the number of bits in each code.  Therefore the code descriptions
+ *   are simply a list of code lengths for each symbol.
+ *
+ * - The code lengths are stored in order for the symbols, so lengths are
+ *   provided for each of the literal/length symbols, and for each of the
+ *   distance symbols.
+ *
+ * - If a symbol is not used in the block, this is represented by a zero as
+ *   as the code length.  This does not mean a zero-length code, but rather
+ *   that no code should be created for this symbol.  There is no way in the
+ *   deflate format to represent a zero-length code.
+ *
+ * - The maximum number of bits in a code is 15, so the possible lengths for
+ *   any code are 1..15.
+ *
+ * - The fact that a length of zero is not permitted for a code has an
+ *   interesting consequence.  Normally if only one symbol is used for a given
+ *   code, then in fact that code could be represented with zero bits.  However
+ *   in deflate, that code has to be at least one bit.  So for example, if
+ *   only a single distance base symbol appears in a block, then it will be
+ *   represented by a single code of length one, in particular one 0 bit.  This
+ *   is an incomplete code, since if a 1 bit is received, it has no meaning,
+ *   and should result in an error.  So incomplete distance codes of one symbol
+ *   should be permitted, and the receipt of invalid codes should be handled.
+ *
+ * - It is also possible to have a single literal/length code, but that code
+ *   must be the end-of-block code, since every dynamic block has one.  This
+ *   is not the most efficient way to create an empty block (an empty fixed
+ *   block is fewer bits), but it is allowed by the format.  So incomplete
+ *   literal/length codes of one symbol should also be permitted.
+ *
+ * - If there are only literal codes and no lengths, then there are no distance
+ *   codes.  This is represented by one distance code with zero bits.
+ *
+ * - The list of up to 286 length/literal lengths and up to 30 distance lengths
+ *   are themselves compressed using Huffman codes and run-length encoding.  In
+ *   the list of code lengths, a 0 symbol means no code, a 1..15 symbol means
+ *   that length, and the symbols 16, 17, and 18 are run-length instructions.
+ *   Each of 16, 17, and 18 are follwed by extra bits to define the length of
+ *   the run.  16 copies the last length 3 to 6 times.  17 represents 3 to 10
+ *   zero lengths, and 18 represents 11 to 138 zero lengths.  Unused symbols
+ *   are common, hence the special coding for zero lengths.
+ *
+ * - The symbols for 0..18 are Huffman coded, and so that code must be
+ *   described first.  This is simply a sequence of up to 19 three-bit values
+ *   representing no code (0) or the code length for that symbol (1..7).
+ *
+ * - A dynamic block starts with three fixed-size counts from which is computed
+ *   the number of literal/length code lengths, the number of distance code
+ *   lengths, and the number of code length code lengths (ok, you come up with
+ *   a better name!) in the code descriptions.  For the literal/length and
+ *   distance codes, lengths after those provided are considered zero, i.e. no
+ *   code.  The code length code lengths are received in a permuted order (see
+ *   the order[] array below) to make a short code length code length list more
+ *   likely.  As it turns out, very short and very long codes are less likely
+ *   to be seen in a dynamic code description, hence what may appear initially
+ *   to be a peculiar ordering.
+ *
+ * - Given the number of literal/length code lengths (nlen) and distance code
+ *   lengths (ndist), then they are treated as one long list of nlen + ndist
+ *   code lengths.  Therefore run-length coding can and often does cross the
+ *   boundary between the two sets of lengths.
+ *
+ * - So to summarize, the code description at the start of a dynamic block is
+ *   three counts for the number of code lengths for the literal/length codes,
+ *   the distance codes, and the code length codes.  This is followed by the
+ *   code length code lengths, three bits each.  This is used to construct the
+ *   code length code which is used to read the remainder of the lengths.  Then
+ *   the literal/length code lengths and distance lengths are read as a single
+ *   set of lengths using the code length codes.  Codes are constructed from
+ *   the resulting two sets of lengths, and then finally you can start
+ *   decoding actual compressed data in the block.
+ *
+ * - For reference, a "typical" size for the code description in a dynamic
+ *   block is around 80 bytes.
+ */
+static int
+dynamic(struct state *s)
+{
+	int nlen, ndist, ncode;  /* number of dyn_lengths in descriptor */
+	int index;               /* index of dyn_lengths[] */
+	int err;                 /* construct() return value */
+	//////////////short lensym[MAXLCODES]; /* lencode memory */
+	struct huffman lencode = {lencnt, lensym};    /* length code */
+	struct huffman distcode = {distcnt, distsym}; /* distance code */
+	static const short order[19] = /* permutation of code length codes */
+		{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14,
+		 1, 15};
+
+	/* get number of dyn_lengths in each table, check dyn_lengths */
+	nlen = bits(s, 5) + 257;
+	ndist = bits(s, 5) + 1;
+	ncode = bits(s, 4) + 4;
+	if (pufferror)
+		return -1;
+
+	if (nlen > MAXLCODES || ndist > MAXDCODES)
+		return -3;                      /* bad counts */
+
+	/* read code length code dyn_lengths (really), missing dyn_lengths are zero */
+	for (index = 0; index < ncode; index++) {
+		dyn_lengths[order[index]] = bits(s, 3);
+		if (pufferror)
+			return -1;
+	}
+	for (; index < 19; index++)
+		dyn_lengths[order[index]] = 0;
+
+	/* build huffman table for code dyn_lengths codes (use lencode
+	 * temporarily) */
+	err = construct(&lencode, dyn_lengths, 19);
+	if (err != 0)
+		return -4;            /* require complete code set here */
+
+	/* read length/literal and distance code length tables */
+	index = 0;
+	while (index < nlen + ndist) {
+		int symbol;             /* decoded value */
+		int len;                /* last length to repeat */
+
+		symbol = decode(s, &lencode);
+		if (symbol < 16)            /* length in 0..15 */
+			dyn_lengths[index++] = symbol;
+		else {                      /* repeat instruction */
+			len = 0;            /* assume repeating zeros */
+			if (symbol == 16) { /* repeat last length 3..6 times */
+				if (index == 0)
+					return -5;      /* no last length! */
+				len = dyn_lengths[index - 1]; /* last length */
+				symbol = 3 + bits(s, 2);
+				if (pufferror)
+					return -1;
+			} else if (symbol == 17) { /* repeat zero 3..10 times */
+				symbol = 3 + bits(s, 3);
+				if (pufferror)
+					return -1;
+			} else { /* == 18, repeat zero 11..138 times */
+				symbol = 11 + bits(s, 7);
+				if (pufferror)
+					return -1;
+			}
+			if (index + symbol > nlen + ndist)
+				return -6; /* too many dyn_lengths! */
+			while (symbol--) /* repeat last or zero symbol times */
+				dyn_lengths[index++] = len;
+		}
+	}
+
+	/* build huffman table for literal/length codes */
+	err = construct(&lencode, dyn_lengths, nlen);
+	if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1))
+		return -7; /* only allow incomplete codes if just one code */
+
+	/* build huffman table for distance codes */
+	err = construct(&distcode, dyn_lengths + nlen, ndist);
+	if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1))
+		return -8; /* only allow incomplete codes if just one code */
+
+	/* decode data until end-of-block code */
+	return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Inflate source to dest.  On return, destlen and sourcelen are updated to the
+ * size of the uncompressed data and the size of the deflate data respectively.
+ * On success, the return value of puff() is zero.  If there is an error in the
+ * source data, i.e. it is not in the deflate format, then a negative value is
+ * returned.  If there is not enough input available or there is not enough
+ * output space, then a positive error is returned.  In that case, destlen and
+ * sourcelen are not updated to facilitate retrying from the beginning with the
+ * provision of more input data or more output space.  In the case of invalid
+ * inflate data (a negative error), the dest and source pointers are updated to
+ * facilitate the debugging of deflators.
+ *
+ * puff() also has a mode to determine the size of the uncompressed output with
+ * no output written.  For this dest must be (unsigned char *)0.  In this case,
+ * the input value of *destlen is ignored, and on return *destlen is set to the
+ * size of the uncompressed output.
+ *
+ * The return codes are:
+ *
+ *   2:  available inflate data did not terminate
+ *   1:  output space exhausted before completing inflate
+ *   0:  successful inflate
+ *  -1:  invalid block type (type == 3)
+ *  -2:  stored block length did not match one's complement
+ *  -3:  dynamic block code description: too many length or distance codes
+ *  -4:  dynamic block code description: code lengths codes incomplete
+ *  -5:  dynamic block code description: repeat lengths with no first length
+ *  -6:  dynamic block code description: repeat more than specified lengths
+ *  -7:  dynamic block code description: invalid literal/length code lengths
+ *  -8:  dynamic block code description: invalid distance code lengths
+ *  -9:  invalid literal/length or distance code in fixed or dynamic block
+ * -10:  distance is too far back in fixed or dynamic block
+ *
+ * Format notes:
+ *
+ * - Three bits are read for each block to determine the kind of block and
+ *   whether or not it is the last block.  Then the block is decoded and the
+ *   process repeated if it was not the last block.
+ *
+ * - The leftover bits in the last byte of the deflate data after the last
+ *   block (if it was a fixed or dynamic block) are undefined and have no
+ *   expected values to check.
+ */
+int
+gunzip(unsigned char *dest,      /* destination pointer */
+       unsigned long *destlen,   /* amount of output space */
+       unsigned char *source,    /* source data pointer */
+       unsigned long *sourcelen) /* amount of input available */
+{
+	struct state s;             /* input/output state */
+	int last, type;             /* block information */
+	int err;                    /* return value */
+
+	pufferror = 0;
+
+	/* initialize output state */
+	s.out = dest;
+	s.outlen = *destlen;        /* ignored if dest is NIL */
+	s.outcnt = 0;
+
+	/* initialize input state */
+	s.in = source;
+	s.inlen = *sourcelen;
+	s.incnt = 0;
+	s.bitbuf = 0;
+	s.bitcnt = 0;
+
+	/* process blocks until last block or error */
+	do {
+		last = bits(&s, 1);         /* one if last block */
+		if (pufferror)
+			return 2;
+
+		type = bits(&s, 2);         /* block type 0..3 */
+		if (pufferror)
+			return 2;
+
+		switch (type) {
+		case 0:
+			err = stored(&s);
+			break;
+		case 1:
+			err = fixed(&s);
+			break;
+		case 2:
+			err = dynamic(&s);
+			break;
+		default:
+			err = -1; /* Invalid */
+			break;
+		}
+
+		if (pufferror)
+			return 2;
+
+		if (err != 0)
+			break;        /* return with error */
+	} while (!last);
+
+	/* update the lengths and return */
+	if (err <= 0) {
+		*destlen = s.outcnt;
+		*sourcelen = s.incnt;
+	}
+
+	return err;
+}
diff --git a/gunzip.h b/gunzip.h
new file mode 100644
index 0000000..6c4d325
--- /dev/null
+++ b/gunzip.h
@@ -0,0 +1,30 @@
+/* gunzip.h
+  Copyright (C) 2002, 2003 Mark Adler, all rights reserved
+  version 1.7, 3 Mar 2002
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the author be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Mark Adler    madler@alumni.caltech.edu
+ */
+
+#define GUNZIP_COMP_BLOCK_OFFSET 0x0A /* Offset of compressed block when no
+				       * flags are set in the GZIP file. */
+
+int gunzip(unsigned char *dest,       /* destination pointer */
+	   unsigned long *destlen,    /* amount of output space */
+	   unsigned char *source,     /* source data pointer */
+	   unsigned long *sourcelen); /* amount of input available */
diff --git a/nand.c b/nand.c
new file mode 100644
index 0000000..bede077
--- /dev/null
+++ b/nand.c
@@ -0,0 +1,975 @@
+/*
+ * nand.c - NAND flash functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "util.h"
+#include "uart.h"
+#include "nand.h"
+
+/* BUS width defines */
+#define BUS_8BIT    0x01
+#define BUS_16BIT   0x02
+#define BUS_32BIT   0x04
+
+/* NAND flash addresses */
+#define NAND_DATA_OFFSET    0x00
+#define NAND_ALE_OFFSET     0x08
+#define NAND_CLE_OFFSET     0x10
+
+#define NAND_TIMEOUT       20480
+
+/* NAND flash commands */
+#define NAND_LO_PAGE        0x00
+#define NAND_HI_PAGE        0x01
+#define NAND_LOCK           0x2A
+#define NAND_UNLOCK_START   0x23
+#define NAND_UNLOCK_END     0x24
+#define NAND_READ_30H       0x30
+#define NAND_EXTRA_PAGE     0x50
+#define	NAND_RDID           0x90
+#define NAND_RDIDADD        0x00
+#define	NAND_RESET          0xFF
+#define	NAND_PGRM_START     0x80
+#define NAND_PGRM_END       0x10
+#define NAND_RDY            0x40
+#define	NAND_PGM_FAIL       0x01
+#define	NAND_BERASEC1       0x60
+#define	NAND_BERASEC2       0xD0
+#define	NAND_STATUS         0x70
+
+/* Status output */
+#define NAND_NANDFSR_READY	0x01
+#define NAND_STATUS_WRITEREADY 	0xC0
+#define NAND_STATUS_ERROR	0x01
+#define NAND_STATUS_BUSY	0x40
+
+#define UNKNOWN_NAND		0xFF /* Unknown device id */
+
+/* Gives the page size in bytes without the spare bytes */
+#define NANDFLASH_PAGESIZE(x) ((x >> 8) << 8)
+
+union flash_data {
+	uint8_t c;
+	uint16_t w;
+	uint32_t l;
+};
+
+union flash_ptr {
+	volatile uint8_t *cp;
+	volatile uint16_t *wp;
+	volatile uint32_t *lp;
+};
+
+struct nand_dev_infos_t {
+	uint8_t   id;              /* Device ID */
+	uint16_t  num_blocks;      /* Number of blocks */
+	uint8_t   pages_per_block; /* Number of pages per block */
+	uint16_t  bytes_per_page;  /* Number of bytes per page (with spare) */
+};
+
+struct nand_info_t {
+	uint32_t base_addr;        /* Base address of NAND CS memory space. */
+	int      bus_width;        /* Bus width: 0 = 8 bits, 1 = 16 bits */
+	int      id;               /* Index into nand_dev_infos_t array. */
+	int      num_blocks;       /* Number of blocks */
+	int      pages_per_block;  /* Number of pages per block */
+	int      bytes_per_page;   /* Number of bytes per page (with spare) */
+	int      num_cab;          /* Number of Column address cycles */
+	int      num_rab;          /* Number of Row address cycles */
+	uint32_t ecc_mask;         /* Mask for ECC register */
+	int      large_page;       /* True if page size >= 2048 bytes */
+	int      ecc_index;        /* ECC position is different for small and
+				    * large page devices. */
+	int      chunk_size;       /* Always read/write in 512 bytes chunk max.
+				    * This will be set based on page size. */
+	int      spare_bytes;      /* Number of spare area bytes per page. */
+	int      blk_addr_shift;   /* Number of bits by which to shift block address */
+	int      page_addr_shift;  /* Number of bits by which to shift page address */
+	int      cs_offset;        /*
+				    * Chip-select offset:
+				    *   0 = CS2 space
+				    *   1 = CS3 space
+				    *   2 = CS4 space
+				    *   3 = CS5 space
+				    */
+};
+
+/* Buffer for storing data read from NAND flash */
+static uint8_t read_buf[MAX_PAGE_SIZE] __attribute__((section(".ddrram")));
+
+/* Symbol from linker script */
+extern uint32_t __NANDFlash;
+
+/* structure for holding details about the NAND device itself */
+static volatile struct nand_info_t nand_info;
+
+/* Table of ROM supported NAND devices */
+static const struct nand_dev_infos_t nand_dev_infos[] = {
+	/* id, num_blocks, pages_per_block, bytes_per_page */
+	{0x6E,   256, 16, 256+8},   /*   1 MB */
+	{0x68,   256, 16, 256+8},   /*   1 MB */
+	{0xEC,   256, 16, 256+8},   /*   1 MB */
+	{0xE8,   256, 16, 256+8},   /*   1 MB */
+	{0xEA,   512, 16, 256+8},   /*   2 MB */
+	{0xE3,   512, 16, 512+16},  /*   4 MB */
+	{0xE5,   512, 16, 512+16},  /*   4 MB */
+	{0xE6,  1024, 16, 512+16},  /*   8 MB */
+
+	{0x39,  1024, 16, 512+16},  /*   8 MB */
+	{0x6B,  1024, 16, 512+16},  /*   8 MB */
+	{0x73,  1024, 32, 512+16},  /*  16 MB */
+	{0x33,  1024, 32, 512+16},  /*  16 MB */
+	{0x75,  2048, 32, 512+16},  /*  32 MB */
+	{0x35,  2048, 32, 512+16},  /*  32 MB */
+
+	{0x43,  1024, 32, 512+16},  /*  16 MB 0x1243 */
+	{0x45,  2048, 32, 512+16},  /*  32 MB 0x1245 */
+	{0x53,  1024, 32, 512+16},  /*  16 MB 0x1253 */
+	{0x55,  2048, 32, 512+16},  /*  32 MB 0x1255 */
+	{0x36,  4096, 32, 512+16},  /*  64 MB */
+	{0x46,  4096, 32, 512+16},  /*  64 MB 0x1346 */
+	{0x56,  4096, 32, 512+16},  /*  64 MB 0x1356 */
+
+	{0x76,  4096, 32, 512+16},  /*  64 MB */
+
+	{0x74,  8192, 32, 512+16},  /* 128 MB 0x1374 */
+	{0x79,  8192, 32, 512+16},  /* 128 MB */
+	{0x71, 16384, 32, 512+16},  /* 256 MB */
+	{0xF1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xA1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xAA,  2048, 64, 2048+64}, /* 256 MB - Big Block */
+	{0xDA,  2048, 64, 2048+64}, /* 256 MB - Big Block */
+	{0xDC,  4096, 64, 2048+64}, /* 512 MB - Big Block */
+	{0xAC,  4096, 64, 2048+64}, /* 512 MB - Big Block */
+	{0xB1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xC1,  1024, 64, 2048+64}, /* 128 MB - Big Block */
+	{0xD3,  4096, 64, 2048+64}, /* 512 MB - Big Block */
+	{0x00,	   0,  0,       0}  /* Indicate end of table */
+};
+
+static volatile uint8_t *
+flash_make_addr(uint32_t baseAddr, uint32_t offset)
+{
+	return (volatile uint8_t *) (baseAddr + offset);
+}
+
+static void
+flash_write_data(uint32_t offset, uint32_t data)
+{
+	volatile union flash_ptr addr;
+	union flash_data dataword;
+	dataword.l = data;
+
+	addr.cp = flash_make_addr(nand_info.base_addr, offset);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		*addr.cp = dataword.c;
+		break;
+	case BUS_16BIT:
+		*addr.wp = dataword.w;
+		break;
+	}
+}
+
+static void
+flash_write_cmd(uint32_t cmd)
+{
+	flash_write_data(NAND_CLE_OFFSET, cmd);
+}
+
+static void
+flash_write_addr(uint32_t addr)
+{
+	flash_write_data(NAND_ALE_OFFSET, addr);
+}
+
+static void
+flash_write_bytes(const uint8_t *src, uint32_t numBytes)
+{
+	volatile union flash_ptr destAddr, srcAddr;
+	uint32_t i;
+
+	srcAddr.cp = (volatile uint8_t *) src;
+	destAddr.cp = flash_make_addr(nand_info.base_addr, NAND_DATA_OFFSET);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		for (i = 0; i < numBytes; i++)
+			*destAddr.cp = *srcAddr.cp++;
+		break;
+	case BUS_16BIT:
+		for (i = 0; i < (numBytes >> 1); i++)
+			*destAddr.wp = *srcAddr.wp++;
+		break;
+	}
+}
+
+static void
+flash_write_addr_bytes(uint32_t numAddrBytes, uint32_t addr)
+{
+	uint32_t i;
+	for (i = 0; i < numAddrBytes; i++)
+		flash_write_addr((addr >> (8*i)) & 0xff);
+}
+
+static void
+flash_write_row_addr_bytes(uint32_t block, uint32_t page)
+{
+	uint32_t row_addr;
+	row_addr =
+		(block << (nand_info.blk_addr_shift - nand_info.page_addr_shift)) | page;
+	flash_write_addr_bytes(nand_info.num_rab, row_addr);
+}
+
+static void
+flash_write_addr_cycles(uint32_t block, uint32_t page)
+{
+	flash_write_addr_bytes(nand_info.num_cab, 0x00000000);
+	flash_write_row_addr_bytes(block, page);
+}
+
+static uint32_t
+flash_read_data(void)
+{
+	volatile union flash_ptr addr;
+	union flash_data cmdword;
+
+	cmdword.l = 0x0;
+	addr.cp = flash_make_addr(nand_info.base_addr, NAND_DATA_OFFSET);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		cmdword.c = *addr.cp;
+		break;
+	case BUS_16BIT:
+		cmdword.w = *addr.wp;
+		break;
+	}
+	return cmdword.l;
+}
+
+static void
+flash_read_bytes(uint8_t *dest, uint32_t numBytes)
+{
+	volatile union flash_ptr destAddr, srcAddr;
+	uint32_t i;
+
+	destAddr.cp = (volatile uint8_t *) dest;
+	srcAddr.cp = flash_make_addr(nand_info.base_addr, NAND_DATA_OFFSET);
+	switch (nand_info.bus_width) {
+	case BUS_8BIT:
+		for (i = 0; i < numBytes; i++)
+			*destAddr.cp++ = *srcAddr.cp;
+		break;
+	case BUS_16BIT:
+		for (i = 0; i < (numBytes >> 1); i++)
+			*destAddr.wp++ = *srcAddr.wp;
+		break;
+	}
+}
+
+/* Poll bit of NANDFSR to indicate ready */
+static int
+nand_wait_for_ready(uint32_t timeout)
+{
+	volatile uint32_t cnt = timeout;
+	uint32_t ready;
+
+	waitloop(200);
+
+	do {
+		ready = AEMIF->NANDFSR & NAND_NANDFSR_READY;
+		cnt--;
+	} while ((cnt > 0) && !ready);
+
+	if (cnt == 0) {
+		log_info("NAND busy timeout");
+		return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+/* Wait for the status to be ready in NAND register
+ * There were some problems reported in DM320 with Ready/Busy pin
+ * not working with all NANDs. So this check has also been added.
+ */
+static int
+nand_wait_for_status(uint32_t timeout)
+{
+	volatile uint32_t cnt;
+	uint32_t status;
+	cnt = timeout;
+
+	do {
+		flash_write_cmd(NAND_STATUS);
+		status = flash_read_data() &
+			(NAND_STATUS_ERROR | NAND_STATUS_BUSY);
+		cnt--;
+	} while ((cnt > 0) && !status);
+
+	if (cnt == 0) {
+		log_info("NAND status timeout");
+		return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+/* Read the current ECC calculation and restart process */
+static uint32_t
+nand_read_ecc(void)
+{
+	uint32_t retval;
+
+	/* Read and mask appropriate (based on CSn space flash is in)
+	 * ECC register */
+	retval = ((uint32_t *)(&(AEMIF->NANDF1ECC)))[nand_info.cs_offset] &
+		nand_info.ecc_mask;
+
+	waitloop(5);
+
+#ifdef NAND_DEBUG
+	uart_send_str("Value read from ECC register: ");
+	uart_send_hexnum(retval, 8);
+	uart_send_lf();
+#endif
+
+	/* Write appropriate bit to start ECC calculations */
+	AEMIF->NANDFCR |= (1<<(8 + (nand_info.cs_offset)));
+	return retval;
+}
+
+/* Get details of the NAND flash used from the id and the table of NAND
+ * devices. */
+static int
+nand_get_details(void)
+{
+	uint32_t deviceID, i, j;
+
+	/* Issue device read ID command. */
+	flash_write_cmd(NAND_RDID);
+	flash_write_addr(NAND_RDIDADD);
+
+	/* Read ID bytes */
+	j        = flash_read_data() & 0xFF;
+	deviceID = flash_read_data() & 0xFF;
+	j        = flash_read_data() & 0xFF;
+	j        = flash_read_data() & 0xFF;
+
+	uart_send_str("  ID:");
+	uart_send_hexnum(deviceID, 2);
+	if (nand_info.bus_width == BUS_16BIT)
+		uart_send_str(", 16");
+	else
+		uart_send_str(", 8");
+
+	log_info("-bit bus");
+
+	i = 0;
+	while (nand_dev_infos[i].id != 0x00) {
+		if (deviceID == nand_dev_infos[i].id) {
+			nand_info.id = (uint8_t) nand_dev_infos[i].id;
+			nand_info.pages_per_block =
+				nand_dev_infos[i].pages_per_block;
+			nand_info.num_blocks = nand_dev_infos[i].num_blocks;
+			nand_info.bytes_per_page =  NANDFLASH_PAGESIZE(
+				nand_dev_infos[i].bytes_per_page);
+
+			nand_info.spare_bytes = nand_dev_infos[i].bytes_per_page -
+				nand_info.bytes_per_page;
+
+			/* Configure small or large page device. */
+			if (nand_info.bytes_per_page >= 2048) {
+				/* Set the large page flag */
+				nand_info.large_page = true;
+				nand_info.ecc_index = 2;
+				nand_info.chunk_size = 512; /* Limit to 512 bytes */
+			} else {
+				/* Clear the large page flag */
+				nand_info.large_page = false;
+				nand_info.ecc_index = 0;
+				nand_info.chunk_size = nand_info.bytes_per_page;
+			}
+
+			/* Setup address shift values */
+			j = 0;
+			while ((nand_info.pages_per_block >> j) > 1)
+				j++;
+
+			nand_info.blk_addr_shift = j;
+			nand_info.page_addr_shift = (nand_info.large_page) ? 16 : 8;
+
+			nand_info.blk_addr_shift += nand_info.page_addr_shift;
+
+			/* Set number of column address bytes needed */
+			nand_info.num_cab = nand_info.page_addr_shift >> 3;
+
+			j = 0;
+			while ((nand_info.num_blocks >> j) > 1)
+				j++;
+
+			/* Set number of row address bytes needed */
+			if ((nand_info.blk_addr_shift + j) <= 24)
+				nand_info.num_rab = 3 -
+					nand_info.num_cab;
+			else if ((nand_info.blk_addr_shift + j) <= 32)
+				nand_info.num_rab = 4 -
+					nand_info.num_cab;
+			else
+				nand_info.num_rab = 5 -
+					nand_info.num_cab;
+
+			/* Set the ECC bit mask */
+			if (nand_info.bytes_per_page < 512)
+				nand_info.ecc_mask = 0x07FF07FF;
+			else
+				nand_info.ecc_mask = 0x0FFF0FFF;
+
+			/* Report informations */
+			uart_send_str("  Blocks: ");
+			uart_send_hexnum(nand_info.num_blocks, 5);
+			uart_send_str(", Pages/block: ");
+			uart_send_hexnum(nand_info.pages_per_block, 3);
+			uart_send_str(", Bytes per page: ");
+			uart_send_hexnum(nand_info.bytes_per_page, 4);
+			uart_send_lf();
+
+			/* Report additional debug informations */
+#ifdef NAND_DEBUG
+			uart_send_str("  Page shift: ");
+			uart_send_hexnum(nand_info.page_addr_shift, 2);
+			uart_send_lf();
+			uart_send_str("  Block shift: ");
+			uart_send_hexnum(nand_info.blk_addr_shift, 2);
+			uart_send_lf();
+			uart_send_str("  Column address bytes: ");
+			uart_send_hexnum(nand_info.num_cab, 2);
+			uart_send_lf();
+			uart_send_str("  Row address bytes: ");
+			uart_send_hexnum(nand_info.num_rab, 2);
+			uart_send_lf();
+			uart_send_str("  ECC mask: ");
+			uart_send_hexnum(nand_info.ecc_mask, 8);
+			uart_send_lf();
+#endif
+
+			return E_PASS;
+		}
+		i++;
+	}
+	/* No match was found for the device ID */
+	return E_FAIL;
+}
+
+static void
+nand_write_spare(uint32_t eccvalue)
+{
+	uint32_t spare_data[4] = {
+		0xFFFFFFFF,
+		0xFFFFFFFF,
+		0xFFFFFFFF,
+		0xFFFFFFFF
+	};
+
+	/* Place the ECC values where the RBL expects them */
+	spare_data[nand_info.ecc_index] = eccvalue;
+
+	/* Write spare bytes infos */
+	if (nand_info.bytes_per_page == 256)
+		flash_write_bytes((uint8_t *) spare_data, 8);
+	else
+		flash_write_bytes((uint8_t *) spare_data, 16);
+}
+
+/*
+ * RBL-expected layout for large page NAND (ex: 2048 bytes/page):
+ *
+ *     DM35x          DM644x
+ * -----------------------------
+ *   512 DATA       2048 DATA
+ *    16 SPARE        64 SPARE
+ *   512 DATA
+ *    16 SPARE
+ *   512 DATA
+ *    16 SPARE
+ *   512 DATA
+ *    16 SPARE
+ *
+ * So for big block NAND devices (bytes per page > 512) on the DM35x, we must
+ * write 512 bytes and write the ECC immediately after that data, and repeat
+ * until all the page is written.
+ */
+
+/* Generic routine to write a page of data to NAND */
+static int
+nand_write_page(uint32_t block, uint32_t page, const uint8_t *src)
+{
+	uint32_t hw_ecc[4]; /* Maximum of 2048 bytes/page (4 * 512 = 2048) */
+	uint8_t numWrites, i;
+
+	numWrites = (nand_info.bytes_per_page >> 9); /* Divide by 512 */
+	if (numWrites == 0)
+		numWrites++;
+
+	/* Write program command */
+	flash_write_cmd(NAND_PGRM_START);
+
+	/* Write address bytes */
+	flash_write_addr_cycles(block, page);
+
+	/* Starting the ECC in the NANDFCR register for CS2 (bit no.8) */
+	nand_read_ecc();
+
+	/* Write data */
+	for (i = 0; i < numWrites; i++) {
+		/* Write data to page */
+		flash_write_bytes(src, nand_info.chunk_size);
+
+		/* Read the ECC value */
+		hw_ecc[i] = nand_read_ecc();
+
+		/* Format ECC */
+		endian_data(&(hw_ecc[i]));
+
+#if defined(DM35x)
+		/* Write spare area */
+		nand_write_spare(hw_ecc[i]);
+#endif
+
+		/* Increment the pointer */
+		src += nand_info.chunk_size;
+	}
+
+#if defined(DM644x)
+	for (i = 0; i < numWrites; i++) {
+		nand_write_spare(hw_ecc[i]);
+	}
+#endif
+
+	/* Write program end command */
+	flash_write_cmd(NAND_PGRM_END);
+
+	/* Wait for the device to be ready */
+	if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+		return E_FAIL;
+
+	/* Return status check result */
+	return nand_wait_for_status(NAND_TIMEOUT);
+}
+
+static uint32_t
+nand_read_spare(void)
+{
+	uint32_t spare_ecc[4], spare_ecc_temp;
+
+	/* Read the stored ECC value(s) */
+	if (nand_info.bytes_per_page == 256)
+		flash_read_bytes((uint8_t *) spare_ecc, 8);
+	else
+		flash_read_bytes((uint8_t *) spare_ecc, 16);
+
+	spare_ecc_temp = spare_ecc[nand_info.ecc_index];
+
+	/* Format ECC */
+	endian_data(&spare_ecc_temp);
+
+	return spare_ecc_temp;
+}
+
+/* Read a page from NAND */
+int
+nand_read_page(uint32_t block, uint32_t page, uint8_t *dest)
+{
+	uint32_t hw_ecc[4];
+	uint32_t spare_ecc[4];
+	uint8_t numReads, i;
+
+	numReads = (nand_info.bytes_per_page >> 9); /* Divide by 512 */
+	if (numReads == 0)
+		numReads++;
+
+	/* Write read command */
+	flash_write_cmd(NAND_LO_PAGE);
+
+	/* Write address bytes */
+	flash_write_addr_cycles(block, page);
+
+	/* Additional confirm command for big_block devices */
+	if (nand_info.large_page)
+		flash_write_cmd(NAND_READ_30H);
+
+	/* Wait for data to be available */
+	if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+		return E_FAIL;
+
+	/* Starting the ECC in the NANDFCR register for CS2(bit no.8) */
+	nand_read_ecc();
+
+	/* Read the page data */
+	for (i = 0; i < numReads; i++) {
+		/* Read data bytes */
+		flash_read_bytes(dest, nand_info.chunk_size);
+
+		/* Read hardware computed ECC */
+		hw_ecc[i] = nand_read_ecc();
+
+#if defined(DM35x)
+		/* Read spare area ECC */
+		spare_ecc[i] = nand_read_spare();
+#endif
+
+		/* Increment the pointer */
+		dest += nand_info.chunk_size;
+	}
+
+#if defined(DM644x)
+	for (i = 0; i < numReads; i++) {
+		spare_ecc[i] = nand_read_spare();
+	}
+#endif
+
+#ifndef NAND_BYPASS_READ_PAGE_ECC_CHECK
+	for (i = 0; i < numReads; i++) {
+		/* Verify ECC values */
+		if (hw_ecc[i] != spare_ecc[i]) {
+			log_info("NAND ECC failure:");
+			uart_send_str("HW    = ");
+			uart_send_hexnum(hw_ecc[i], 8);
+			uart_send_lf();
+			uart_send_str("SPARE =");
+			uart_send_hexnum(spare_ecc[i], 8);
+			uart_send_lf();
+
+			return E_FAIL;
+		}
+	}
+#endif /* NAND_BYPASS_READ_PAGE_ECC_CHECK */
+
+	/* Return status check result */
+	return nand_wait_for_status(NAND_TIMEOUT);
+}
+
+/* Verify data written by reading and comparing byte for byte */
+static int
+nand_verify_page(int block, int page, const uint8_t *src)
+{
+	int i;
+
+	if (nand_read_page(block, page, read_buf) != E_PASS)
+		return E_FAIL;
+
+	for (i = 0; i < nand_info.bytes_per_page; i++) {
+		/* Check for data read errors */
+		if (src[i] != read_buf[i]) {
+			int k = i;
+
+			uart_send_str("NAND verify page failed at block ");
+			uart_send_hexnum(block, 4);
+			uart_send_str(", page ");
+			uart_send_hexnum(page, 4);
+			uart_send_str(", offset ");
+			uart_send_hexnum(i, 4);
+			uart_send_lf();
+
+			for (k = i - 8; k < (i + 20); k += 4) {
+				uart_send_str("offset ");
+				uart_send_hexnum(k, 4);
+				uart_send_str(", ram=");
+				uart_send_hexnum(*((uint32_t *) &src[k]), 8);
+				uart_send_str(", nand=");
+				uart_send_hexnum(*((uint32_t *) &read_buf[k]), 8);
+				uart_send_lf();
+			}
+
+			return E_FAIL;
+		}
+	}
+	return E_PASS;
+}
+
+/* NAND Flash unprotect command */
+static uint32_t
+nand_unprotect_blocks(uint32_t startBlkNum, uint32_t blkCnt)
+{
+	uint32_t endBlkNum;
+	endBlkNum = startBlkNum + blkCnt - 1;
+
+	uart_send_str("Unprotecting blocks ");
+	uart_send_hexnum(startBlkNum, 4);
+	uart_send_str(" to ");
+	uart_send_hexnum(endBlkNum, 4);
+	uart_send_lf();
+
+	/* Do bounds checking */
+	if (endBlkNum >= nand_info.num_blocks) {
+		log_fail("Invalid last block");
+		return E_FAIL;
+	}
+
+	flash_write_cmd(NAND_UNLOCK_START);
+	flash_write_row_addr_bytes(startBlkNum, 0);
+
+	flash_write_cmd(NAND_UNLOCK_END);
+	flash_write_row_addr_bytes(endBlkNum, 0);
+
+	return E_PASS;
+}
+
+/* NAND Flash protect command */
+static void
+nand_protect_blocks(void)
+{
+	log_info("Protecting the entire NAND flash");
+	flash_write_cmd(NAND_LOCK);
+}
+
+/* NAND Flash erase block function */
+static uint32_t
+nand_erase_blocks(uint32_t startBlkNum, uint32_t blkCnt)
+{
+	uint32_t i;
+
+	/* Do bounds checking */
+	if ((startBlkNum + blkCnt - 1) >= nand_info.num_blocks)
+		return E_FAIL;
+
+	/* Output info about what we are doing */
+	uart_send_str("Erasing blocks ");
+	uart_send_hexnum(startBlkNum, 4);
+	uart_send_str(" to ");
+	uart_send_hexnum(startBlkNum + blkCnt - 1, 4);
+	uart_send_lf();
+
+	for (i = 0; i < blkCnt; i++) {
+		/* Start erase command */
+		flash_write_cmd(NAND_BERASEC1);
+
+		/* Write the row addr bytes only */
+		flash_write_row_addr_bytes(startBlkNum + i, 0);
+
+		/* Confirm erase command */
+		flash_write_cmd(NAND_BERASEC2);
+
+		/* Wait for the device to be ready */
+		if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+			return E_FAIL;
+
+		/* Verify the op succeeded by reading status from flash */
+		if (nand_wait_for_status(NAND_TIMEOUT) != E_PASS)
+			return E_FAIL;
+	}
+
+	return E_PASS;
+}
+
+/* Initialize NAND interface and find the details of the NAND used */
+int
+nand_init(void)
+{
+	uint32_t width;
+	uint32_t *CSRegs;
+
+	log_info("Initializing NAND flash:");
+
+#ifdef NAND_BYPASS_READ_PAGE_ECC_CHECK
+	log_info("  Bypassing ECC checks");
+#endif /* NAND_BYPASS_READ_PAGE_ECC_CHECK */
+
+	/* Set NAND flash base address */
+	nand_info.base_addr = (uint32_t) &__NANDFlash;
+
+	/* Get the cs_offset (can be 0 through 3 - corresponds with CS2 through
+	 * CS5) */
+	nand_info.cs_offset = (nand_info.base_addr >> 25) - 1;
+
+	/* Setting the nand_width = 0(8 bit NAND) or 1(16 bit NAND). AEMIF CS2
+	 *  bus Width is given by the BOOTCFG(bit no.5). */
+	width = (((SYSTEM->BOOTCFG) & 0x20) >> 5);
+	nand_info.bus_width = (width)?BUS_16BIT:BUS_8BIT;
+
+	/* Setup AEMIF registers for NAND     */
+	CSRegs = (uint32_t *) &(AEMIF->A1CR);
+
+	/* Set correct AxCR reg */
+	CSRegs[nand_info.cs_offset] = 0x3FFFFFFC | width;
+
+	/* NAND enable for CSx. */
+	AEMIF->NANDFCR |= (0x1 << (nand_info.cs_offset));
+	nand_read_ecc();
+
+	/* Send reset command to NAND */
+	flash_write_cmd(NAND_RESET);
+
+	if (nand_wait_for_ready(NAND_TIMEOUT) != E_PASS)
+		return E_FAIL;
+
+	return nand_get_details();
+}
+
+static int
+nand_write_verify_page(int block, int page, const uint8_t *src)
+{
+	int status;
+
+	status = nand_write_page(block, page, src);
+	if (status != E_PASS)
+		return E_FAIL;
+
+	waitloop(200);
+
+	/* Verify the page just written */
+	return nand_verify_page(block, page, src);
+}
+
+int
+nand_write_prog(struct nand_image_descriptor_t *im_desc, const uint8_t *src,
+		size_t size)
+{
+	int num_blocks;
+	int max_block_num;
+	int page_num;
+	uint32_t count_mask;
+
+	im_desc->page_num = 1; /* Always start data in page 1 */
+
+	/* Do some rounding based on data buffer size */
+	im_desc->size_in_pages = 0;
+	while ((im_desc->size_in_pages * nand_info.bytes_per_page) < size)
+		im_desc->size_in_pages++;
+
+	/* Get total number of blocks needed */
+	num_blocks = 0;
+	while ((num_blocks * nand_info.pages_per_block) <
+	       (im_desc->size_in_pages + 1))
+		num_blocks++;
+
+	uart_send_str("Needed blocks: ");
+	uart_send_hexnum(num_blocks, 4);
+	uart_send_lf();
+	uart_send_str("Needed pages: ");
+	uart_send_hexnum(im_desc->size_in_pages, 4);
+	uart_send_lf();
+
+	/* Check whether writing UBL or APP (based on destination block) */
+	if (im_desc->block_num == START_UBL_BLOCK_NUM)
+		max_block_num = END_UBL_BLOCK_NUM;
+	else
+		max_block_num = nand_info.num_blocks - 1;
+
+NAND_WRITE_RETRY:
+	if (im_desc->block_num > max_block_num) {
+		log_fail("Block > last block");
+		return E_FAIL;
+	}
+
+	uart_send_str("Trying block ");
+	uart_send_hexnum(im_desc->block_num, 4);
+	uart_send_lf();
+
+	/* Unprotect all needed blocks of the Flash */
+	if (nand_unprotect_blocks(im_desc->block_num, num_blocks) != E_PASS) {
+		im_desc->block_num++;
+		log_info("Unprotect failed");
+		goto NAND_WRITE_RETRY;
+	}
+
+	/* Erase the block where the header goes and the data starts */
+	if (nand_erase_blocks(im_desc->block_num, num_blocks) != E_PASS) {
+		im_desc->block_num++;
+		log_info("Erase failed");
+		goto NAND_WRITE_RETRY;
+	}
+
+#ifdef NAND_DEBUG_WRITE_RAMP
+	{
+		int k;
+
+		/* Usefull for debugging NAND ECC and spare bytes errors. */
+		for (k = 0; k < 512; k++)
+			ptr[k] = 0xCAFE0000 | k;
+	}
+#endif
+
+	page_num = 0; /* Start in page 0. */
+
+	if (im_desc->magic != UBL_CMD_FLASH_DATA) {
+		/* Write the header to page 0. */
+		log_info("Writing header");
+
+		if (nand_write_verify_page(im_desc->block_num, page_num,
+					   (uint8_t *) im_desc) != E_PASS)
+			return E_FAIL;
+
+		/* Set starting page number for next data portion. */
+		page_num = 1;
+	}
+
+	/* The following assumes power of 2 page_cnt - *should* always be
+	 * valid. */
+	count_mask = nand_info.pages_per_block - 1;
+	log_info("Writing data");
+	do {
+		/* Write data on a per page basis */
+		if (nand_write_verify_page(im_desc->block_num,
+					   page_num & count_mask, src)
+		    != E_PASS)
+			return E_FAIL;
+
+		page_num++;
+		src += nand_info.bytes_per_page;
+		if (!(page_num & count_mask))
+			im_desc->block_num++;
+	} while (page_num <= im_desc->size_in_pages);
+
+	nand_protect_blocks();
+
+	return E_PASS;
+}
+
+int
+nand_erase_all(void)
+{
+	/* Unprotect the NAND Flash */
+	nand_unprotect_blocks(0, nand_info.num_blocks - 1);
+
+	/* Erase all the pages */
+	if (nand_erase_blocks(0, nand_info.num_blocks - 1) != E_PASS)
+		return E_FAIL;
+
+	/* Protect the device */
+	nand_protect_blocks();
+
+	return E_PASS;
+}
+
+int
+nand_get_pages_per_block(void)
+{
+	return nand_info.pages_per_block;
+}
+
+int
+nand_get_bytes_per_page(void)
+{
+	return nand_info.bytes_per_page;
+}
diff --git a/nand.h b/nand.h
new file mode 100644
index 0000000..e1d3165
--- /dev/null
+++ b/nand.h
@@ -0,0 +1,64 @@
+/*
+ * nand.h - NAND flash definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _NAND_H_
+#define _NAND_H_
+
+#include "common.h"
+#include "davinci.h"
+
+/* Define which blocks are valid for writing UBL and APP data */
+#define START_UBL_BLOCK_NUM     1
+#define END_UBL_BLOCK_NUM       5
+#define START_APP_BLOCK_NUM     6
+
+#define MAX_PAGE_SIZE	        (2048+64) /* Data bytes + spare area */
+
+/* NAND descriptor expected by RBL when it loads UBL image. */
+struct nand_image_descriptor_t {
+	uint32_t magic;
+	uint32_t entry_point;
+	uint32_t size_in_pages;
+	uint32_t block_num;
+	uint32_t page_num;
+	uint32_t load_address; /* Not used by RBL */
+};
+
+
+int nand_init(void);
+
+int nand_erase_all(void);
+
+int nand_read_page(uint32_t block, uint32_t page, uint8_t *dest);
+
+int nand_write_prog(struct nand_image_descriptor_t *im_desc,
+		    const uint8_t *src, size_t size);
+
+/* Copy Application from NAND to RAM */
+int nand_copy(uint32_t *jump_entry_point);
+
+int nand_get_pages_per_block(void);
+
+int nand_get_bytes_per_page(void);
+
+#endif /* _NAND_H_ */
diff --git a/nandboot.c b/nandboot.c
new file mode 100644
index 0000000..d78db42
--- /dev/null
+++ b/nandboot.c
@@ -0,0 +1,125 @@
+/*
+ * nandboot.c - NAND boot mode functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "nand.h"
+#include "util.h"
+#include "uart.h"
+
+static uint8_t nand_header[MAX_PAGE_SIZE] __attribute__((section(".ddrram")));
+
+/*
+ * Find out where the application is and copy to RAM
+ *   jump_entry_point: Entry point for application we are decoding out of
+ *   flash
+ */
+int
+nand_copy(uint32_t *jump_entry_point)
+{
+	uint32_t count, start_block;
+	uint32_t i;
+	uint32_t magicNum;
+	uint32_t block, page;
+	uint32_t readError = E_FAIL;
+	int failedOnceAlready = false;
+	uint8_t *rxBuf; /* Temporary buffer to load header in NAND */
+	struct nand_image_descriptor_t im_desc;
+	int last_header_block;
+
+	start_block = START_APP_BLOCK_NUM;
+	last_header_block = start_block + 10;
+
+NAND_startAgain:
+	/* Read header about application starting at START_APP_BLOCK_NUM, Page 0
+	 * and try 10 blocks. */
+	for (count = start_block; count <= last_header_block; count++) {
+		if (nand_read_page(count, 0, nand_header) != E_PASS)
+			continue;
+
+		magicNum = *((uint32_t *) nand_header);
+
+		/* Valid magic number found */
+		if ((magicNum & MAGIC_NUMBER_MASK) == MAGIC_NUMBER_VALID) {
+			start_block = count;
+			break;
+		}
+	}
+
+	/* Never found valid header. */
+	if (count > last_header_block)
+		return E_FAIL;
+
+	memcpy((void *) &im_desc, nand_header, sizeof(im_desc));
+
+	uart_send_str("Image infos: Magic = ");
+	uart_send_hexnum(im_desc.magic, 8);
+	uart_send_str(", Entry = ");
+	uart_send_hexnum(im_desc.entry_point, 8);
+	uart_send_str(", Pages = ");
+	uart_send_hexnum(im_desc.size_in_pages, 8);
+	uart_send_str(", Load = ");
+	uart_send_hexnum(im_desc.load_address, 8);
+	uart_send_lf();
+
+	rxBuf = (uint8_t *) im_desc.load_address;
+
+NAND_retry:
+	/* initialize block and page number to be used for read */
+	block = im_desc.block_num;
+	page = im_desc.page_num;
+
+	/* Perform the actual copying of the application from NAND to RAM */
+	for (i = 0; i < im_desc.size_in_pages; i++) {
+		/* if page goes beyond max number of pages increment block
+		 * number and reset page number */
+		if (page >= nand_get_pages_per_block()) {
+			page = 0;
+			block++;
+		}
+
+		/* Copy the data */
+		readError =
+			nand_read_page(block, page++,
+				       &rxBuf[i * nand_get_bytes_per_page()]);
+
+		/*
+		 * We attempt to read the app data twice. If we fail twice then
+		 * we go look for a new application header in the NAND flash at
+		 * the next block.
+		 */
+		if (readError != E_PASS) {
+			if (failedOnceAlready) {
+				start_block++;
+				goto NAND_startAgain;
+			} else {
+				failedOnceAlready = true;
+				goto NAND_retry;
+			}
+		}
+	}
+
+	/* Application was read correctly, so set entrypoint */
+	*jump_entry_point = im_desc.entry_point;
+
+	return E_PASS;
+}
diff --git a/nor.c b/nor.c
new file mode 100644
index 0000000..6ff7936
--- /dev/null
+++ b/nor.c
@@ -0,0 +1,1187 @@
+/*
+ * nor.c - NOR flash functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "util.h"
+#include "uart.h"
+#include "nor.h"
+
+/* Bit masks */
+#define BIT0    0x00000001
+#define BIT1    0x00000002
+#define BIT2    0x00000004
+#define BIT3    0x00000008
+#define BIT4    0x00000010
+#define BIT5    0x00000020
+#define BIT6    0x00000040
+#define BIT7    0x00000080
+#define BIT8    0x00000100
+#define BIT9    0x00000200
+#define BIT10   0x00000400
+#define BIT11   0x00000800
+#define BIT12   0x00001000
+#define BIT13   0x00002000
+#define BIT14   0x00004000
+#define BIT15   0x00008000
+#define BIT16   0x00010000
+#define BIT17   0x00020000
+#define BIT18   0x00040000
+#define BIT19   0x00080000
+#define BIT20   0x00100000
+#define BIT21   0x00200000
+#define BIT22   0x00400000
+#define BIT23   0x00800000
+#define BIT24   0x01000000
+#define BIT25   0x02000000
+#define BIT26   0x04000000
+#define BIT27   0x08000000
+#define BIT28   0x10000000
+#define BIT29   0x20000000
+#define BIT30   0x40000000
+#define BIT31   0x80000000
+
+#define BUS_8BIT    0x01
+#define BUS_16BIT   0x02
+#define BUS_32BIT   0x04
+
+/**************** DEFINES for AMD Basic Command Set **************/
+#define AMD_CMD0                    0xAA        // AMD CMD PREFIX 0
+#define AMD_CMD1                    0x55        // AMD CMD PREFIX 1
+#define AMD_CMD0_ADDR               0x555       // AMD CMD0 Offset 
+#define AMD_CMD1_ADDR               0x2AA       // AMD CMD1 Offset 
+#define AMD_CMD2_ADDR       	    0x555       // AMD CMD2 Offset 
+#define AMD_ID_CMD                  0x90        // AMD ID CMD
+#define AMD_MANFID_ADDR             0x00        // Manufacturer ID offset
+#define AMD_DEVID_ADDR0             0x01        // First device ID offset
+#define AMD_DEVID_ADDR1             0x0E        // Offset for 2nd byte of 3 byte ID 
+#define AMD_DEVID_ADDR2             0x0F        // Offset for 3rd byte of 3 byte ID 
+#define AMD_ID_MULTI                0x7E        // First-byte ID value for 3-byte ID
+#define AMD_RESET                   0xF0        // AMD Device Reset Command
+#define AMD_BLK_ERASE_SETUP_CMD     0x80        // Block erase setup
+#define AMD_BLK_ERASE_CMD	        0x30        // Block erase confirm
+#define AMD_BLK_ERASE_DONE	        0xFFFF      // Block erase check value
+#define AMD_PROG_CMD                0xA0        // AMD simple Write command
+#define AMD_WRT_BUF_LOAD_CMD        0x25        // AMD write buffer load command
+#define AMD_WRT_BUF_CONF_CMD        0x29        // AMD write buffer confirm command
+
+/**************** DEFINES for Intel Basic Command Set **************/
+#define INTEL_ID_CMD            0x90        // Intel ID CMD
+#define INTEL_MANFID_ADDR       0x00        // Manufacturer ID offset
+#define INTEL_DEVID_ADDR        0x01        // Device ID offset
+#define INTEL_RESET             0xFF        // Intel Device Reset Command
+#define INTEL_ERASE_CMD0        0x20        // Intel Erase command
+#define INTEL_ERASE_CMD1        0xD0        // Intel Erase command
+#define INTEL_WRITE_CMD         0x40        // Intel simple write command
+#define INTEL_WRT_BUF_LOAD_CMD  0xE8        // Intel write buffer load command
+#define INTEL_WRT_BUF_CONF_CMD  0xD0        // Intel write buffer confirm command
+#define INTEL_LOCK_CMD0         0x60        // Intel lock mode command
+#define INTEL_LOCK_BLOCK_CMD    0x01        // Intel lock command
+#define INTEL_UNLOCK_BLOCK_CMD  0xD0        // Intel unlock command
+#define INTEL_CLEARSTATUS_CMD   0x50        // Intel clear status command
+
+
+/**************** DEFINES for CFI Commands and Table **************/
+
+// CFI Entry and Exit commands
+#define CFI_QRY_CMD             0x98U
+#define CFI_EXIT_CMD            0xF0U
+
+// CFI address locations
+#define CFI_QRY_CMD_ADDR        0x55U
+
+// CFI Table Offsets in Bytes
+#define CFI_Q                   0x10
+#define CFI_R                   0x11
+#define CFI_Y                   0x12
+#define CFI_CMDSET              0x13
+#define CFI_CMDSETADDR          0x15
+#define CFI_ALTCMDSET           0x17
+#define CFI_ALTCMDSETADDR       0x19
+#define CFI_MINVCC              0x1B
+#define CFI_MAXVCC              0x1C
+#define CFI_MINVPP              0x1D
+#define CFI_MAXVPP              0x1E
+#define CFI_TYPBYTEPGMTIME      0x1F
+#define CFI_TYPBUFFERPGMTIME    0x20
+#define CFI_TYPBLOCKERASETIME   0x21
+#define CFI_TYPCHIPERASETIME    0x22
+#define CFI_MAXBYTEPGMTIME      0x23
+#define CFI_MAXBUFFERPGMTIME    0x24
+#define CFI_MAXBLOCKERASETIME   0x25
+#define CFI_MAXCHIPERASETIME    0x26
+#define CFI_DEVICESIZE          0x27
+#define CFI_INTERFACE           0x28
+#define CFI_WRITESIZE           0x2A
+#define CFI_NUMBLKREGIONS       0x2C
+#define CFI_BLKREGIONS          0x2D
+#define CFI_BLKREGIONSIZE       0x04
+
+// Maximum number of block regions supported
+#define CFI_MAXREGIONS          0x06
+
+/*********************** Enumerated types *************************/
+// Supported Flash Manufacturers
+enum flash_manufacturer_id_t {
+	UNKNOWN_ID = 0x00,
+	AMD = 0x01,
+	FUJITSU = 0x04,
+	INTEL = 0x89,
+	MICRON = 0x2C,
+	SAMSUNG = 0xEC,
+	SHARP = 0xB0
+};
+
+typedef enum flash_manufacturer_id_t MANFID;
+
+// Supported CFI command sets
+enum FlashCommandSet {
+	UNKNOWN_CMDSET = 0x0000,
+	INTEL_EXT_CMDSET = 0x0001,
+	AMD_BASIC_CMDSET = 0x0002,
+	INTEL_BASIC_CMDSET = 0x0003,
+	AMD_EXT_CMDSET = 0x0004,
+	MITSU_BASIC_CMDSET = 0x0100,
+	MITSU_EXT_CMDSET = 0x0101
+    
+};
+typedef enum FlashCommandSet CMDSET;
+
+/*************************** Structs *********************************/
+// Struct to hold discovered flash parameters
+typedef struct _NOR_MEDIA_STRUCT_ {
+	uint32_t       flashBase;                          // 32-bit address of flash start
+	uint8_t        busWidth;                           // 8-bit or 16-bit bus width
+	uint8_t        chipOperatingWidth;                 // The operating width of each chip
+	uint8_t        maxTotalWidth;                      // Maximum extent of width of all chips combined - determines offset shifts
+	uint32_t       flashSize;                          // Size of NOR flash regions in bytes (numberDevices * size of one device)
+	uint32_t       bufferSize;                         // Size of write buffer
+	CMDSET       commandSet;                         // command set id (see CFI documentation)
+	uint8_t        numberDevices;                      // Number of deives used in parallel
+	uint8_t        numberRegions;                      // Number of regions of contiguous regions of same block size
+	uint32_t       numberBlocks[CFI_MAXREGIONS];    // Number of blocks in a region
+	uint32_t       blockSize[CFI_MAXREGIONS];       // Size of the blocks in a region
+	enum flash_manufacturer_id_t       manfID;                             // Manufacturer's ID
+	uint16_t       devID1;                             // Device ID
+	uint16_t       devID2;                             // Used for AMD 3-byte ID devices
+} NOR_INFO, *PNOR_INFO;
+
+typedef union {
+	uint8_t c;
+	uint16_t w;
+	uint32_t l;
+} FLASHData;
+
+typedef union {
+	volatile uint8_t *cp;
+	volatile uint16_t *wp;
+	volatile uint32_t *lp;
+} FLASHPtr;
+
+//External and global static variables
+extern uint32_t __NORFlash;
+
+static volatile NOR_INFO gNorInfo;
+
+// ----------------- Bus Width Agnostic commands -------------------
+volatile uint8_t *flash_make_addr (uint32_t blkAddr, uint32_t offset)
+{
+	return ((volatile uint8_t *) ( blkAddr + (offset * gNorInfo.maxTotalWidth)));
+}
+
+void flash_make_cmd (uint8_t cmd, void *cmdbuf)
+{
+	int32_t i;
+	uint8_t *cp = (uint8_t *) cmdbuf;
+
+	for (i = gNorInfo.busWidth; i > 0; i--)
+		*cp++ = (i & (gNorInfo.chipOperatingWidth - 1)) ? 0x00 : cmd;
+}
+
+void flash_write_cmd (uint32_t blkAddr, uint32_t offset, uint8_t cmd)
+{
+	volatile FLASHPtr addr;
+	FLASHData cmdword;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( cmd, &cmdword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		*addr.cp = cmdword.c;
+		break;
+        case BUS_16BIT:
+		*addr.wp = cmdword.w;
+		break;
+	}
+}
+
+void flash_write_data(uint32_t address, uint32_t data)
+{
+	volatile FLASHPtr pAddr;
+	FLASHData dataword;
+	dataword.l = data;
+
+	pAddr.cp = (volatile uint8_t*) address;
+	
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		*pAddr.cp = dataword.c;
+		break;
+        case BUS_16BIT:
+		*pAddr.wp = dataword.w;
+		break;
+	}
+}
+
+void flash_write_databuffer(uint32_t* address, void* data, uint32_t numBytes)
+{
+	volatile FLASHPtr pAddr, pData;
+	volatile uint8_t* endAddress;
+		
+	pData.cp = (volatile uint8_t*) data;
+	pAddr.cp = (volatile uint8_t*) *address;
+	endAddress =(volatile uint8_t*)((*address)+numBytes);
+	while (pAddr.cp < endAddress)
+	{
+		switch (gNorInfo.busWidth)
+		{
+	        case BUS_8BIT:
+			*pAddr.cp++ = *pData.cp++;
+			break;
+		case BUS_16BIT:
+			*pAddr.wp++ = *pData.wp++;
+			break;
+		}
+	}
+    
+	// Put last data written at start of data buffer - For AMD verification
+	switch (gNorInfo.busWidth)
+	{
+        case BUS_8BIT:
+		*address = (uint32_t)(endAddress-1);
+		break;
+        case BUS_16BIT:
+		*address = (uint32_t)(endAddress-2);
+		break;
+	}
+
+}
+
+uint32_t flash_verify_databuffer(uint32_t address, void* data, uint32_t numBytes)
+{
+	volatile FLASHPtr pAddr, pData;
+	volatile uint8_t* endAddress;
+		
+	pData.cp = (volatile uint8_t*) data;
+	pAddr.cp = (volatile uint8_t*) address;
+	endAddress =(volatile uint8_t*)(address+numBytes);
+	while (pAddr.cp < endAddress)
+	{
+		switch (gNorInfo.busWidth)
+		{
+	        case BUS_8BIT:
+			if ( (*pAddr.cp++) != (*pData.cp++) )
+				return E_FAIL;
+			break;
+		case BUS_16BIT:
+			if ( (*pAddr.wp++) != (*pData.wp++) )
+				return E_FAIL;
+			break;
+		}
+	}
+	return E_PASS;
+}
+
+uint32_t flash_read_data(uint32_t address, uint32_t offset)
+{
+	volatile FLASHPtr pAddr;
+	FLASHData dataword;
+	dataword.l = 0x00000000;
+
+	pAddr.cp = flash_make_addr(address, offset);
+	
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		dataword.c = *pAddr.cp;
+		break;
+            
+        case BUS_16BIT:
+		dataword.w = *pAddr.wp;
+		break;
+	}
+	return dataword.l;
+}
+
+FLASHData flash_read_CFI_bytes (uint32_t blkAddr, uint32_t offset, uint8_t numBytes)
+{
+	int32_t i;
+	FLASHData readword;
+	uint8_t* pReadword = &readword.c;
+	
+	for (i = 0; i < numBytes; i++)
+	{
+		*pReadword++ = *(flash_make_addr (blkAddr, offset+i));
+	}
+	
+	return readword;
+}
+
+Bool flash_data_isequal (uint32_t blkAddr, uint32_t offset, uint32_t val)
+{
+	FLASHData testword_a, testword_b;
+	Bool retval = FALSE;
+
+	testword_a.l = val;
+	testword_b.l = flash_read_data(blkAddr, offset);
+	
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = (testword_a.c == testword_b.c);
+		break;
+        case BUS_16BIT:
+		retval = (testword_a.w == testword_b.w);
+		break;
+	}
+	return retval;
+}
+
+Bool flash_CFI_isequal (uint32_t blkAddr, uint32_t offset, uint8_t val)
+{
+	volatile FLASHPtr addr;
+	FLASHData testword;
+	
+	Bool retval = TRUE;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( val, &testword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = (testword.c == *addr.cp);
+		break;
+        case BUS_16BIT:
+		retval = (testword.w == *addr.wp);
+		break;
+	}
+	return retval;
+}
+
+Bool flash_issetall (uint32_t blkAddr, uint32_t offset, uint8_t mask)
+{
+	volatile FLASHPtr addr;
+	FLASHData maskword;
+	maskword.l = 0x00000000;
+	
+	Bool retval = TRUE;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( mask, &maskword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = ((maskword.c & *addr.cp) == maskword.c);
+		break;
+        case BUS_16BIT:
+		retval = ((maskword.w & *addr.wp) == maskword.w);
+		break;
+	}
+	return retval;
+}
+
+Bool flash_issetsome (uint32_t blkAddr, uint32_t offset, uint8_t mask)
+{
+	volatile FLASHPtr addr;
+	FLASHData maskword;
+	
+	Bool retval = TRUE;
+
+	addr.cp = flash_make_addr (blkAddr, offset);
+	flash_make_cmd ( mask, &maskword);
+	switch (gNorInfo.busWidth)
+	{
+	case BUS_8BIT:
+		retval = (maskword.c & *addr.cp);
+		break;
+        case BUS_16BIT:
+		retval = (maskword.w & *addr.wp);
+		break;
+	}
+	return retval;
+}
+
+//Initialize the AEMIF subsystem and settings
+uint32_t NOR_Init()
+{
+	uint8_t width = ( ( (SYSTEM->BOOTCFG) >> 5) & 0x1 );
+
+	// Select ASYNC EMIF Address Lines
+	SYSTEM->PINMUX[0] = 0xC1F;
+
+	// Program Asynchronous Wait Cycles Configuration Control Register
+#warning "To check: AEMIF->AWCCR |= 0x0;"
+	AEMIF->AWCCR |= 0x0;
+
+	// Program Asynchronous Bank3-5 Register
+	AEMIF->A1CR = 0x3FFFFFFC | width;
+	AEMIF->A2CR = 0x3FFFFFFC | width;
+	AEMIF->A3CR = 0x3FFFFFFC | width;
+	AEMIF->A4CR = 0x3FFFFFFC | width;
+    
+	/*AEMIF->A1CR = 0
+	  | ( 0 << 31 ) // selectStrobe      = 0;
+	  | ( 0 << 30 ) // extWait           = 0;
+	  | ( 0 << 26 ) // writeSetup        = 0;    //   0 ns
+	  | ( 3 << 20 ) // writeStrobe       = 3;    //  35 ns
+	  | ( 0 << 17 ) // writeHold         = 0;    //   0 ns
+	  | ( 3 << 13 ) // readSetup         = 3;    //  30 ns
+	  | ( 10<< 7 )  // readStrobe        = 10;   // 120 ns
+	  | ( 0 << 4 )  // readHold          = 0;    //   0 ns
+	  | ( 3 << 2 )  // turnAround        = 3;    //  ?? ns ( MAX TIMEOUT )
+	  | ( 1 << 0 )  // asyncSize         = 1;    // 16-bit bus
+	  ;*/
+                
+	//Init the FlashInfo structure
+	gNorInfo.flashBase = (uint32_t) &(__NORFlash);
+    
+	// Set width to 8 or 16
+	gNorInfo.busWidth = (width)?BUS_16BIT:BUS_8BIT;
+    
+	// Perform CFI Query
+	if (QueryCFI(gNorInfo.flashBase) == E_PASS)
+	{
+		// Below is specifically needed to check for AMD flash on DVEVM (rev. D or earlier)
+		// since it's top address line is not connected (don't ask me why)
+		if (gNorInfo.numberRegions == 1)
+		{
+			if ( QueryCFI( gNorInfo.flashBase+(gNorInfo.flashSize>>1) ) == E_PASS )     
+			{
+				gNorInfo.flashSize >>= 1;
+				gNorInfo.numberBlocks[0] >>= 1;
+			}
+		}
+	}
+	else
+	{
+		log_info("CFI query failed.");
+		return E_FAIL;
+	}
+    
+	// Setup function pointers
+    
+	log_info("NOR Initialization:");
+    
+	uart_send_str("  Command Set: ");
+	switch (gNorInfo.commandSet)
+	{
+        case AMD_BASIC_CMDSET:
+        case AMD_EXT_CMDSET:
+		Flash_Erase          = &AMD_Erase;
+		Flash_BufferWrite    = &AMD_BufferWrite;
+		Flash_Write          = &AMD_Write;
+		Flash_ID             = &AMD_ID;
+		log_info("AMD");
+		break;
+        case INTEL_BASIC_CMDSET:
+        case INTEL_EXT_CMDSET:
+		Flash_Erase          = &Intel_Erase;
+		Flash_BufferWrite    = &Intel_BufferWrite;
+		Flash_Write          = &Intel_Write;
+		Flash_ID             = &Intel_ID;
+		log_info("Intel");
+		break;
+        default:
+		Flash_Write          = &Unsupported_Write;
+		Flash_BufferWrite    = &Unsupported_BufferWrite;
+		Flash_Erase          = &Unsupported_Erase;
+		Flash_ID             = &Unsupported_ID;
+		log_info("Unknown");
+		break;
+	}
+    
+	if ( (*Flash_ID)(gNorInfo.flashBase) != E_PASS)
+	{
+		log_info("NOR ID failed.");
+		return E_FAIL;
+	}
+        
+	uart_send_str("  Manufacturer: ");
+	switch(gNorInfo.manfID)
+	{
+        case AMD:
+		uart_send_str("AMD");
+		break;
+        case FUJITSU:
+		uart_send_str("FUJITSU");
+		break;
+        case INTEL:
+		uart_send_str("INTEL");
+		break;
+        case MICRON:
+		uart_send_str("MICRON");
+		break;
+        case SAMSUNG:
+		uart_send_str("SAMSUNG");
+		break;
+        case SHARP:
+		uart_send_str("SHARP");
+		break;
+        default:
+		uart_send_str("Unknown");
+		break;
+	}
+	uart_send_lf();
+	uart_send_str("  Size (in bytes): ");
+	uart_send_hexnum(gNorInfo.flashSize, 8);
+	uart_send_lf();
+    
+	return E_PASS;    
+}
+
+// Query the chip to check for CFI table and data
+uint32_t QueryCFI( uint32_t baseAddress )
+{                
+	int32_t i;
+	uint32_t blkVal; 
+    
+	// Six possible NOR Flash Configurations of DM644x
+	//  1) Bus in x8 mode, x8 only device
+	//  2) Bus in x8 mode, single x8/x16 flash operating in x8 mode
+	//  3) Bus in x16 mode, single x8/x16 or x16-only flash operating in x16 mode
+	//  4) Bus in x16 mode, two x8 flash operating in parallel.
+	//  5) Bus in x16 mode, two x8/x16 flash, each in x8 mode, operating in parallel 
+	//  6) Bus in x16 mode, single x16/x32 flash operating in x16 mode
+	
+	for (gNorInfo.chipOperatingWidth = BUS_8BIT; gNorInfo.chipOperatingWidth <= gNorInfo.busWidth;  gNorInfo.chipOperatingWidth <<= 1)
+	{
+		for (gNorInfo.maxTotalWidth = gNorInfo.busWidth; gNorInfo.maxTotalWidth <= (gNorInfo.busWidth*2); gNorInfo.maxTotalWidth <<= 1)
+		{
+			// Specify number of devices
+			gNorInfo.numberDevices = 0;
+			while ( gNorInfo.numberDevices * gNorInfo.chipOperatingWidth < gNorInfo.busWidth)
+				gNorInfo.numberDevices++;
+                                    
+			// Enter the CFI Query mode
+			flash_write_cmd (baseAddress, 0, CFI_EXIT_CMD);
+			flash_write_cmd (baseAddress, CFI_QRY_CMD_ADDR, CFI_QRY_CMD);
+            
+			// Check for Query QRY values
+			if ( flash_CFI_isequal ( baseAddress, CFI_Q, 'Q') && 
+			     flash_CFI_isequal ( baseAddress, CFI_R, 'R') && 
+			     flash_CFI_isequal ( baseAddress, CFI_Y, 'Y') )
+			{               
+				gNorInfo.commandSet = (CMDSET) (flash_read_CFI_bytes(baseAddress,CFI_CMDSET,2).w);
+				gNorInfo.flashSize = 0x1 << flash_read_CFI_bytes(baseAddress,CFI_DEVICESIZE,1).c * gNorInfo.numberDevices;
+				gNorInfo.numberRegions = flash_read_CFI_bytes(baseAddress,CFI_NUMBLKREGIONS,1).c;
+				gNorInfo.bufferSize = 0x1 << flash_read_CFI_bytes(baseAddress,CFI_WRITESIZE,2).w * gNorInfo.numberDevices;
+                
+				// Get info on sector sizes in each erase region of device
+				for (i = 0;i < gNorInfo.numberRegions; i++)
+				{    
+					blkVal = flash_read_CFI_bytes(baseAddress,(CFI_BLKREGIONS+i*CFI_BLKREGIONSIZE),4).l;
+					gNorInfo.numberBlocks[i] = (blkVal&0x0000FFFF) + 1;
+					gNorInfo.blockSize[i]    = ((blkVal&0xFFFF0000) ? ( ((blkVal>>16)&0xFFFF) * 256) : 128) * gNorInfo.numberDevices;
+				}
+                
+				// Exit CFI mode 
+				flash_write_cmd (baseAddress, 0, CFI_EXIT_CMD);
+			    
+				return E_PASS;
+			}
+		}        
+	}
+    
+	flash_write_cmd (baseAddress, 0, CFI_EXIT_CMD);   
+	return E_FAIL;
+}
+
+
+// -------------------------------------------------------------------------
+// Manufacturer Specific Commands
+// -------------------------------------------------------------------------
+
+// ------------------------  Default Empty  ---------------------------
+uint32_t Unsupported_Write( uint32_t address, volatile uint32_t data)
+{
+	return E_FAIL;
+}
+uint32_t Unsupported_BufferWrite(uint32_t address, volatile uint8_t data[], uint32_t length )
+{
+	return E_FAIL;
+}
+uint32_t Unsupported_Erase(uint32_t address)
+{
+	return E_FAIL;
+}
+
+uint32_t Unsupported_ID(uint32_t address)
+{
+	return E_FAIL;
+}
+
+
+// -------------------- Begin of Intel specific commands -----------------------
+
+//ID flash
+uint32_t Intel_ID( uint32_t baseAddress )
+{
+	// Intel Exit back to read array mode
+	Intel_Soft_Reset_Flash();
+    
+	// Write ID command
+	flash_write_cmd(baseAddress, 0, INTEL_ID_CMD);
+        
+	//Read Manufacturer's ID
+	gNorInfo.manfID = (enum flash_manufacturer_id_t) flash_read_data(baseAddress, INTEL_MANFID_ADDR);
+    
+	// Read Device ID
+	gNorInfo.devID1 = (uint16_t) (enum flash_manufacturer_id_t) flash_read_data(baseAddress, INTEL_DEVID_ADDR);
+	gNorInfo.devID2 = 0x0000;
+        
+	// Intel Exit back to read array mode
+	Intel_Soft_Reset_Flash(); 
+    
+	return E_PASS;
+}
+
+// Reset back to Read array mode
+void Intel_Soft_Reset_Flash()
+{
+	// Intel Exit back to read array mode
+	flash_write_cmd(gNorInfo.flashBase,0,INTEL_RESET);
+}
+
+// Clear status register
+void Intel_Clear_Status()
+{
+	// Intel clear status
+	flash_write_cmd(gNorInfo.flashBase,0,INTEL_CLEARSTATUS_CMD);
+}
+
+// Remove block write protection
+uint32_t Intel_Clear_Lock(volatile uint32_t blkAddr)
+{
+
+	// Write the Clear Lock Command
+	flash_write_cmd(blkAddr,0,INTEL_LOCK_CMD0);
+
+	flash_write_cmd(blkAddr,0,INTEL_UNLOCK_BLOCK_CMD);
+
+	// Check Status
+	return Intel_Lock_Status_Check();
+}
+
+// Write-protect a block
+uint32_t Intel_Set_Lock(volatile uint32_t blkAddr)
+{
+	// Write the Set Lock Command	
+	flash_write_cmd(blkAddr,0,INTEL_LOCK_CMD0);            
+	
+	flash_write_cmd(blkAddr,0,INTEL_LOCK_BLOCK_CMD);
+
+	// Check Status
+	return Intel_Lock_Status_Check();
+}
+
+void Intel_Wait_For_Status_Complete()
+{
+	while ( !flash_issetall(gNorInfo.flashBase, 0, BIT7) );
+}
+
+uint32_t Intel_Lock_Status_Check()
+{
+	uint32_t retval = E_PASS;
+	//uint8_t status;
+
+	Intel_Wait_For_Status_Complete();
+
+	//status = flash_read_uint16((uint32_t)gNorInfo.flashBase,0);
+	//if ( status & BIT5 )
+	if (flash_issetsome(gNorInfo.flashBase, 0, (BIT5 | BIT3)))
+	{
+		retval = E_FAIL;
+		/*if ( status & BIT4 )
+		  {
+		  uart_send_str("Command Sequence Error\r\n");
+		  }
+		  else
+		  {
+		  uart_send_str("Clear Lock Error\r\n");
+		  }*/
+	}
+	/*if ( status & BIT3 )
+	  {
+	  retval = E_FAIL;
+	  //uart_send_str("Voltage Range Error\n");
+	  }*/
+	
+	// Clear status
+	Intel_Clear_Status();
+	
+	// Put chip back into read array mode.
+	Intel_Soft_Reset_Flash();
+	
+	// Set Timings back to Optimum for Read
+	return retval;
+}
+
+// Erase Block
+uint32_t Intel_Erase(volatile uint32_t blkAddr)
+{
+	uint32_t retval = E_PASS;
+	
+	// Clear Lock Bits
+	retval |= Intel_Clear_Lock(blkAddr);
+	
+	// Send Erase commands
+	flash_write_cmd(blkAddr,0,INTEL_ERASE_CMD0);
+	flash_write_cmd(blkAddr,0,INTEL_ERASE_CMD1);
+	
+	// Wait until Erase operation complete
+	Intel_Wait_For_Status_Complete();
+    
+	// Verify successful erase                       
+	if ( flash_issetsome(gNorInfo.flashBase, 0, BIT5) )
+		retval = E_FAIL;
+    
+	// Put back into Read Array mode.
+	Intel_Soft_Reset_Flash();
+	
+	return retval;
+}
+
+// Write data
+uint32_t Intel_Write( uint32_t address, volatile uint32_t data )
+{
+	uint32_t retval = E_PASS;
+	
+	// Send Write command
+	flash_write_cmd(address,0,INTEL_WRITE_CMD);
+	flash_write_data(address, data);
+                  
+	// Wait until Write operation complete
+	Intel_Wait_For_Status_Complete();
+	                          
+	// Verify successful program
+	if ( flash_issetsome(gNorInfo.flashBase, 0, (BIT4|BIT3)) )
+	{
+		//uart_send_str("Write Op Failed.\r\n");
+		retval = E_FAIL;
+	}
+    
+	// Lock the block
+	//retval |= Intel_Set_Lock(blkAddr);
+    
+	// Put back into Read Array mode.
+	Intel_Soft_Reset_Flash();
+                          
+	return retval;
+}
+
+// Buffer write data
+uint32_t Intel_BufferWrite(uint32_t address, volatile uint8_t data[], uint32_t numBytes )
+{
+	uint32_t startAddress = address;
+	uint32_t retval = E_PASS;
+	uint32_t timeoutCnt = 0, shift;
+
+	// Send Write_Buff_Load command   
+	do {
+		flash_write_cmd(address,0,INTEL_WRT_BUF_LOAD_CMD);
+		timeoutCnt++;
+	}while( (!flash_issetall(gNorInfo.flashBase, 0, BIT7)) && (timeoutCnt < 0x00010000) );
+    
+	if (timeoutCnt >= 0x10000)
+	{
+		//    uart_send_str("Write Op Failed.\r\n");
+		retval = E_TIMEOUT;
+	}
+	else
+	{
+		//Establish correct shift value
+		shift = 0;
+		while ((gNorInfo.busWidth >> shift) > 1)
+			shift++;
+    
+		// Write Length (either numBytes or numBytes/2)	    
+		flash_write_cmd(startAddress, 0, (numBytes >> shift) - 1);
+        
+		// Write buffer length
+		//flash_write_data(startAddress, (length - 1));
+        
+		// Write buffer data
+		flash_write_databuffer(&address,(void*)data,numBytes);
+                
+		// Send write buffer confirm command
+		flash_write_cmd(startAddress,0,INTEL_WRT_BUF_CONF_CMD);
+        
+		// Check status
+		Intel_Wait_For_Status_Complete();
+		// Verify program was successful
+        
+		//if ( flash_read_uint8(gNorInfo.flashBase,0) & BIT4 )
+		if ( flash_issetsome(gNorInfo.flashBase, 0, BIT4) )
+		{
+#ifdef NOR_DEBUG
+			log_info("Write Buffer Op Failed.");
+#endif
+			retval = E_FAIL;
+		}
+        
+		// Put back into Read Array mode.
+		Intel_Soft_Reset_Flash();
+	}
+                          
+	return retval;
+}
+// -------------------- End of Intel specific commands ----------------------
+
+
+// -------------------- Begin of AMD specific commands -----------------------
+// Identify the Manufacturer and Device ID 
+uint32_t AMD_ID( uint32_t baseAddress )
+{
+	// Exit back to read array mode
+	AMD_Soft_Reset_Flash();
+
+	// Write ID commands
+	AMD_Prefix_Commands();
+	flash_write_cmd(baseAddress, AMD_CMD2_ADDR, AMD_ID_CMD);
+
+	// Read manufacturer's ID
+	gNorInfo.manfID = (enum flash_manufacturer_id_t) flash_read_data(baseAddress, AMD_MANFID_ADDR);
+    
+	// Read device ID
+	gNorInfo.devID1 = (uint16_t) flash_read_data(baseAddress, AMD_DEVID_ADDR0);
+    
+	// Read additional ID bytes if needed
+	if ( (gNorInfo.devID1 & 0xFF ) == AMD_ID_MULTI )
+		gNorInfo.devID2 = flash_read_CFI_bytes(baseAddress, AMD_DEVID_ADDR1, 2).w;
+	else
+		gNorInfo.devID2 = 0x0000;
+        
+	// Exit back to read array mode
+	AMD_Soft_Reset_Flash();
+    
+	return E_PASS;
+}
+
+
+
+void AMD_Soft_Reset_Flash()
+{
+	// Reset Flash to be in Read Array Mode
+	flash_write_cmd(gNorInfo.flashBase,AMD_CMD2_ADDR,AMD_RESET);                  
+}
+
+// AMD Prefix Commands
+void AMD_Prefix_Commands()
+{
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD0_ADDR, AMD_CMD0);
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD1_ADDR, AMD_CMD1);
+}
+
+// Erase Block
+uint32_t AMD_Erase(uint32_t blkAddr)
+{
+	uint32_t retval = E_PASS;
+
+	// Send commands
+	AMD_Prefix_Commands();
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD2_ADDR, AMD_BLK_ERASE_SETUP_CMD);
+	AMD_Prefix_Commands();
+	flash_write_cmd(blkAddr, AMD_CMD2_ADDR, AMD_BLK_ERASE_CMD);
+	
+	// Poll DQ7 and DQ15 for status
+	while ( !flash_issetall(blkAddr, 0, BIT7) );
+    
+	// Check data 
+	if ( !flash_data_isequal(blkAddr, 0, AMD_BLK_ERASE_DONE) )
+		retval = E_FAIL;
+	
+	/* Flash Mode: Read Array */
+	AMD_Soft_Reset_Flash();
+    
+	return retval;
+}
+
+// AMD Flash Write
+uint32_t
+AMD_Write(uint32_t address, volatile uint32_t data)
+{
+	uint32_t retval = E_PASS;
+	
+	// Send Commands
+	AMD_Prefix_Commands();
+	flash_write_cmd(gNorInfo.flashBase, AMD_CMD2_ADDR, AMD_PROG_CMD);
+	flash_write_data(address, data);
+
+	// Wait for ready.
+	while(TRUE) {
+		if ((flash_read_data(address, 0 ) & (BIT7 | BIT15) ) == (data & (BIT7 | BIT15))) {
+			break;
+		} else {
+			if (flash_issetall(address, 0, BIT5)) {
+				if ((flash_read_data(address, 0 ) & (BIT7 | BIT15) ) != (data & (BIT7 | BIT15))) {
+					log_info("Timeout occurred.");
+					retval = E_FAIL;
+				}
+				break;				
+			}
+		}
+	}
+	
+	// Return Read Mode
+	AMD_Soft_Reset_Flash();
+	
+	// Verify the data.
+	if ((retval == E_PASS) && (flash_read_data(address, 0) != data))
+		retval = E_FAIL;
+	
+	return retval;
+}
+
+// AMD flash buffered write
+uint32_t
+AMD_BufferWrite(uint32_t address, volatile uint8_t data[], uint32_t numBytes)
+{
+	uint32_t startAddress = address;
+	uint32_t blkAddress, blkSize;
+	uint32_t data_temp;
+	uint32_t retval = E_PASS;
+	uint32_t shift;
+	
+	// Get block base address and size
+	DiscoverBlockInfo(address, &blkSize, &blkAddress);
+			
+	// Write the Write Buffer Load command
+	AMD_Prefix_Commands();
+	flash_write_cmd(blkAddress, 0, AMD_WRT_BUF_LOAD_CMD);
+        
+	//Establish correct shift value
+	shift = 0;
+	while ((gNorInfo.busWidth >> shift) > 1)
+		shift++;
+    
+	// Write Length (either numBytes or numBytes/2)	    
+	flash_write_cmd(blkAddress, 0, (numBytes >> shift) - 1);
+	
+	// Write Data
+	flash_write_databuffer(&address,(void*)data, numBytes);
+		
+	// Program Buffer to Flash Confirm Write
+	flash_write_cmd(blkAddress, 0, AMD_WRT_BUF_CONF_CMD);                  
+    
+	// Read last data item                  
+	data_temp = flash_read_data((uint32_t) (data + (address - startAddress)), 0);
+        
+	while (true) {
+		//temp1 = flash_read_data(address, 0 );   
+		if ((flash_read_data(address, 0 ) & (BIT7 | BIT15)) == (data_temp & (BIT7 | BIT15))) {
+			break;
+		} else {
+			// Timeout has occurred
+			if(flash_issetall(address, 0, BIT5)) {
+				if ((flash_read_data(address, 0 ) & (BIT7 | BIT15)) != (data_temp & (BIT7 | BIT15))) {
+					log_info("Timeout occurred.");
+					retval = E_FAIL;
+				}
+				break;
+			}
+			// Abort has occurred
+			if (flash_issetall(address, 0, BIT1)) {
+				if ((flash_read_data(address, 0 ) & (BIT7 | BIT15)) != (data_temp & (BIT7 | BIT15))) {
+					log_info("Abort occurred.");
+					retval = E_FAIL;
+					AMD_Write_Buf_Abort_Reset_Flash ();
+				}
+				break;
+			}
+		}
+	}
+	
+	// Put chip back into read array mode.
+	AMD_Soft_Reset_Flash();
+	if (retval == E_PASS)
+		retval = flash_verify_databuffer(startAddress,(void*)data, numBytes);
+
+	return retval;
+}
+
+// AMD Write Buf Abort Reset Flash
+void
+AMD_Write_Buf_Abort_Reset_Flash(void)
+{
+	// Reset Flash to be in Read Array Mode
+	AMD_Prefix_Commands();
+	AMD_Soft_Reset_Flash();
+}
+
+// Get info on block address and sizes
+uint32_t
+DiscoverBlockInfo(uint32_t address,uint32_t* blockSize, uint32_t* blockAddr)
+{
+	int32_t i;
+	uint32_t currRegionAddr, nextRegionAddr;
+        
+	currRegionAddr = (uint32_t) gNorInfo.flashBase;
+	if ((address < currRegionAddr) || (address >= (currRegionAddr+gNorInfo.flashSize))) {
+		return E_FAIL;
+	}
+    
+	for (i = 0; i < (gNorInfo.numberRegions); i++) {
+		nextRegionAddr = currRegionAddr + (gNorInfo.blockSize[i] * gNorInfo.numberBlocks[i]);
+		if ( (currRegionAddr <= address) && (nextRegionAddr > address) ) {
+			*blockSize = gNorInfo.blockSize[i];
+			*blockAddr = address & (~((*blockSize) - 1));
+			break;
+		}
+		currRegionAddr = nextRegionAddr;
+	}
+
+	return E_PASS;
+}
+
+uint32_t
+NOR_GlobalErase(void)
+{
+	return NOR_Erase((volatile uint32_t) gNorInfo.flashBase, (volatile uint32_t) gNorInfo.flashSize);
+}
+
+
+uint32_t
+nor_get_flashbase(void)
+{
+	return gNorInfo.flashBase;
+}
+
+uint32_t
+NOR_Erase(volatile uint32_t start_address, volatile uint32_t size)
+{
+	volatile uint32_t addr  = start_address;
+	volatile uint32_t range = start_address + size;
+	uint32_t blockSize, blockAddr;
+	
+	log_info("Erasing the NOR Flash");
+	
+   	while (addr < range) {
+		if (DiscoverBlockInfo(addr, &blockSize, &blockAddr) != E_PASS) {
+			uart_send_str("Address out of range");
+			return E_FAIL;
+		}
+		
+		//Increment to the next block
+		if ( (*Flash_Erase)(blockAddr) != E_PASS) {
+			uart_send_str("Erase failure at block address ");
+			uart_send_hexnum(blockAddr, 8);
+			uart_send_lf();
+			return E_FAIL;
+		}
+		addr = blockAddr + blockSize;
+	    
+		// Show status messages
+		uart_send_str("Erased through ");
+		uart_send_hexnum(addr, 8);
+		uart_send_lf();
+  	}
+
+	log_info("Erase Completed");
+
+  	return(E_PASS);
+}
+
+// NOR_WriteBytes
+uint32_t
+NOR_WriteBytes(uint32_t writeAddress, uint32_t numBytes, uint32_t readAddress)
+{
+	uint32_t blockSize, blockAddr;
+	int i;
+	uint32_t retval = E_PASS;
+
+	log_info("Writing the NOR Flash");
+
+	// Make numBytes even if needed
+	if (numBytes & 0x00000001)
+		numBytes++;
+		
+	if (DiscoverBlockInfo(writeAddress, &blockSize, &blockAddr) != E_PASS) {
+		uart_send_str("Address out of range");
+		return E_FAIL;
+	}
+
+	while (numBytes > 0) {
+		if ( (numBytes < gNorInfo.bufferSize) || (writeAddress & (gNorInfo.bufferSize-1) )) {
+			if ((*Flash_Write)(writeAddress, flash_read_data(readAddress,0) ) != E_PASS) {
+				log_info("\r\nNormal Write Failed.");
+				retval = E_FAIL;
+			} else {
+				numBytes     -= gNorInfo.busWidth;
+				writeAddress += gNorInfo.busWidth;
+				readAddress  += gNorInfo.busWidth;
+			}
+		} else {
+			// Try to use buffered writes
+			if ((*Flash_BufferWrite)(writeAddress, (volatile uint8_t *)readAddress, gNorInfo.bufferSize) == E_PASS) {
+				numBytes -= gNorInfo.bufferSize;
+				writeAddress += gNorInfo.bufferSize;
+				readAddress  += gNorInfo.bufferSize;
+			}
+			else {
+				// Try normal writes as a backup
+				for (i = 0; i<(gNorInfo.bufferSize>>1); i++) {
+					if ((*Flash_Write)(writeAddress, flash_read_data(readAddress,0) ) != E_PASS) {
+						log_info("\r\nNormal write also failed");
+						retval = E_FAIL;
+						break;
+					} else {
+						numBytes     -= gNorInfo.busWidth;
+						writeAddress += gNorInfo.busWidth;
+						readAddress  += gNorInfo.busWidth;
+					}
+				}
+			}
+		}
+
+		// Output status info on the write operation
+		if (retval == E_PASS) {    
+			if  ( ((writeAddress & (~((blockSize>>4)-1))) == writeAddress) || (numBytes == 0) ) {
+				uart_send_str("NOR Write OK through ");
+				uart_send_hexnum(writeAddress, 8);
+				uart_send_lf();
+        		
+				if (DiscoverBlockInfo(writeAddress, &blockSize, &blockAddr) != E_PASS) {
+					uart_send_str("Address out of range");
+					return E_FAIL;
+				}
+			}
+		} else {
+			log_info("NOR Write Failed... Aborting");
+			return E_FAIL;
+		}
+  	}
+
+  	return retval;
+}
diff --git a/nor.h b/nor.h
new file mode 100644
index 0000000..49ccd6c
--- /dev/null
+++ b/nor.h
@@ -0,0 +1,44 @@
+/*
+ * nor.h - NOR flash definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+ 
+#ifndef _NOR_H_
+#define _NOR_H_
+
+#include "common.h"
+#include "davinci.h"
+
+/* Global NOR commands */
+uint32_t NOR_Init (void);
+
+int nor_copy(uint32_t *jump_entry_point);
+
+uint32_t NOR_WriteBytes(uint32_t writeAddress, uint32_t numBytes, uint32_t readAddress);
+uint32_t NOR_GlobalErase(void);
+uint32_t NOR_Erase(uint32_t start_address, uint32_t size);
+uint32_t DiscoverBlockInfo(uint32_t address,uint32_t* blockSize, uint32_t* blockAddr);
+
+
+uint32_t nor_get_flashbase(void);
+
+
+#endif /* _NOR_H_ */
diff --git a/norboot.c b/norboot.c
new file mode 100644
index 0000000..816582c
--- /dev/null
+++ b/norboot.c
@@ -0,0 +1,72 @@
+/*
+ * norboot.c - NOR boot mode functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "nor.h"
+#include "util.h"
+#include "uart.h"
+
+extern NOR_INFO gNorInfo;
+
+/* Function to find out where the Application is and copy to DRAM */
+int
+nor_copy(uint32_t *jump_entry_point)
+{
+	volatile struct nor_boot_t *hdr = 0;
+	volatile uint32_t *appStartAddr = 0;
+	volatile uint32_t count = 0;
+	volatile uint32_t *ramPtr = 0;
+	uint32_t blkSize, blkAddress;
+
+	if (NOR_Init() != E_PASS)
+		return E_FAIL;
+	    
+	DiscoverBlockInfo((gNorInfo.flashBase + UBL_IMAGE_SIZE), &blkSize,
+			  &blkAddress);
+	
+	hdr = (volatile struct nor_boot_t *) (blkAddress + blkSize);
+
+	/* Check for valid magic number. */
+	if ((hdr->magicNum & 0xFFFFFF00) != MAGIC_NUMBER_VALID) {
+		log_fail("No valid header found");
+	 	return E_FAIL;
+	}
+
+	/* Set the source address for copy */
+	appStartAddr = (uint32_t *)(((uint8_t*) hdr) + sizeof(struct nor_boot_t));
+
+	if (hdr->magicNum == UBL_MAGIC_BIN_IMG) {
+		log_fail("Unsupported image format");
+		return E_FAIL;
+	}
+
+	ramPtr = (uint32_t *) hdr->ldAddress;
+
+	/* Copy data to RAM */
+	memcpy(ramPtr, appStartAddr, hdr->appSize);
+
+	/* Application was read correctly, so set entrypoint */
+	*jump_entry_point = hdr->entryPoint;
+
+ 	return E_PASS;
+}
diff --git a/uart.c b/uart.c
new file mode 100644
index 0000000..7baefb9
--- /dev/null
+++ b/uart.c
@@ -0,0 +1,287 @@
+/*
+ * uart.c - UART Rx and Tx functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "uart.h"
+#include "util.h"
+#include "crc.h"
+#include "gunzip.h"
+
+/* Symbol from linker script */
+extern uint32_t __DDR_FREE; /* Start of free DDR memory region. */
+extern uint32_t __DDR_END;  /* Last DDR memory address. */
+
+/* Receive data from UART */
+static int
+uart_recv_bytes(size_t count, uint8_t *dest)
+{
+	uint32_t i, status = 0;
+	uint32_t timerStatus = 1;
+
+	for (i = 0; i < count; i++) {
+		/* Enable timer one time */
+		timer0_start();
+		do {
+			status = (UART0->LSR)&(0x01);
+			timerStatus = timer0_status();
+		} while (!status && timerStatus);
+
+		if (timerStatus == 0) {
+			host_msg("UART_TIMEOUT");
+			return E_TIMEOUT;
+		}
+
+		/* Receive byte */
+		dest[i] = (UART0->RBR) & 0xFF;
+
+		/* Check status for errors */
+		if ((UART0->LSR & 0x1C) != 0) {
+			host_msg("UART_RXERR");
+			return E_FAIL;
+		}
+	}
+	return E_PASS;
+}
+
+/* Send bytes with optional null terminating character. */
+static void
+uart_send_bytes(char *string)
+{
+	uint32_t status = 0;
+	int32_t i, count;
+	uint32_t timerStatus = 1;
+
+	count = strlen(string);
+
+	for (i = 0; i < count; i++) {
+		/* Enable Timer one time */
+		timer0_start();
+		do {
+			status = (UART0->LSR)&(0x20);
+			timerStatus = timer0_status();
+		} while (!status && timerStatus);
+
+		if (timerStatus == 0)
+			return; /* E_TIMEOUT */
+
+		/* Send byte */
+		(UART0->THR) = string[i];
+	}
+}
+
+/* Check if the given string is received via UART */
+static int
+uart_check_string(char *string, int include_null)
+{
+	int i, count;
+
+	count = strlen(string);
+	if (include_null != false)
+		count++;
+
+	for (i = 0; i < count; i++) {
+		uint8_t recv;
+
+		/* Get one byte */
+		if (uart_recv_bytes(1, &recv) != E_PASS)
+			return E_FAIL;
+
+		if (recv != string[i])
+			return E_FAIL;
+	}
+	return E_PASS;
+}
+
+/* Receive a uint32 value in HEX form (8 bytes) */
+static int
+uart_recv_hex_uint32(uint32_t *data)
+{
+	int k;
+	uint8_t recv[8];
+	uint32_t temp;
+	int shift;
+	const int num_ascii_char = 8;
+
+	/* Get 8 bytes from UART */
+	if (uart_recv_bytes(num_ascii_char, recv) != E_PASS)
+		return E_FAIL;
+
+	*data = 0;
+
+	/* Converting ascii to Hex */
+	for (k = 0, shift = 28; k < num_ascii_char; k++, shift -= 4) {
+		temp = recv[k] - 48;
+
+		if (temp > 22) /* Lower case a,b,c,d,e,f */
+			temp -= 39;
+		else if (temp > 9) /* Upper case A,B,C,D,E,F */
+			temp -= 7;
+
+		*data |= temp << shift;
+	}
+	return E_PASS;
+}
+
+/* Send line feed (\n) to UART. */
+void
+uart_send_lf(void)
+{
+	uart_send_bytes("\r\n");
+}
+
+/* Send a string to UART, without line feed. */
+void
+uart_send_str(char *string)
+{
+	uart_send_bytes(string);
+}
+
+/* Send a string to UART, with line feed. */
+void
+uart_send_str_lf(char *string)
+{
+	uart_send_bytes(string);
+	uart_send_lf();
+}
+
+void
+uart_send_hexnum(uint32_t value, int digits)
+{
+	char seq[9];
+	uint32_t i, shift;
+	uint8_t temp;
+
+	for (i = 0; i < digits; i++) {
+		shift = ((digits - 1) - i) * 4;
+		temp = (value >> shift) & 0x0F;
+		if (temp > 9)
+			temp += 7;
+		seq[i] = temp + 48;
+	}
+	seq[digits] = 0;
+
+	uart_send_str("0x");
+	uart_send_bytes(seq);
+}
+
+int
+uart_get_cmd(uint32_t *boot_cmd)
+{
+	if (uart_check_string("    CMD", true) != E_PASS)
+		return E_FAIL;
+
+	if (uart_recv_hex_uint32(boot_cmd) != E_PASS)
+		return E_FAIL;
+
+	return E_PASS;
+}
+
+uint32_t
+uart_get_prog(struct uart_ack_header_t *uart_ack_header)
+{
+	uint32_t error;
+	uint32_t recv_crc, computed_crc;
+	unsigned long inflate_dstbuf_len, inflate_srcbuf_len;
+	uint8_t *ddr_free = (uint8_t *) &__DDR_FREE;
+
+	uart_ack_header->recv_buffer = ddr_free;
+	uart_ack_header->inflate_dstbuf = ddr_free + MAX_IMAGE_SIZE;
+	inflate_dstbuf_len = ((uint8_t *) &__DDR_END) + 1 - uart_ack_header->inflate_dstbuf;
+
+	/* Send ACK command */
+	error = uart_check_string("    ACK", true);
+	if (error != E_PASS)
+		return E_FAIL;
+
+	/* Get the ACK header elements */
+	error =  uart_recv_hex_uint32(&uart_ack_header->magic);
+	error |= uart_recv_hex_uint32(&recv_crc);
+	error |= uart_recv_hex_uint32(&uart_ack_header->size);
+	error |= uart_recv_hex_uint32(&uart_ack_header->entry_point);
+	error |= uart_check_string("0000", false);
+	if (error != E_PASS)
+		return E_FAIL;
+
+	uart_send_str("Magic = ");
+	uart_send_hexnum(uart_ack_header->magic, 8);
+	uart_send_str(", CRC = ");
+	uart_send_hexnum(recv_crc, 8);
+	uart_send_str(", Entry = ");
+	uart_send_hexnum(uart_ack_header->entry_point, 8);
+	uart_send_str(", Size = ");
+	uart_send_hexnum(uart_ack_header->size, 8);
+	uart_send_lf();
+
+	/* Verify that the file size is appropriate */
+	if ((uart_ack_header->size == 0) ||
+	    (uart_ack_header->size > MAX_IMAGE_SIZE)) {
+		host_msg("BADCNT");
+		return E_FAIL;
+	}
+
+	/* Send BEGIN command */
+	host_msg("BEGIN");
+
+	/* Receive the data over UART */
+	if (uart_recv_bytes(uart_ack_header->size,
+			    uart_ack_header->recv_buffer)
+	    != E_PASS) {
+		return E_FAIL;
+	}
+
+	/* Return first DONE when all data arrives */
+	host_msg("DONE");
+
+	computed_crc = crc32_dv_compute(uart_ack_header->recv_buffer,
+					uart_ack_header->size);
+	if (computed_crc != recv_crc) {
+		host_msg("BADCRC");
+		return E_FAIL;
+	}
+
+	inflate_srcbuf_len = uart_ack_header->size - GUNZIP_COMP_BLOCK_OFFSET;
+
+	error = gunzip(uart_ack_header->inflate_dstbuf, &inflate_dstbuf_len,
+		       &uart_ack_header->recv_buffer[GUNZIP_COMP_BLOCK_OFFSET],
+		       &inflate_srcbuf_len);
+	if (error != 0) {
+		uart_send_str("gzip error = ");
+		uart_send_hexnum(error, 8);
+		uart_send_lf();
+		host_msg("GZIPERR");
+		return E_FAIL;
+	}
+
+	uart_send_str("Deflated data size = ");
+	uart_send_hexnum(inflate_dstbuf_len, 8);
+	uart_send_lf();
+
+	uart_ack_header->recv_buffer = uart_ack_header->inflate_dstbuf;
+	uart_ack_header->size = inflate_dstbuf_len;
+
+	/* Return DONE when all data is validated */
+	host_msg("DONE");
+
+	return E_PASS;
+}
diff --git a/uart.h b/uart.h
new file mode 100644
index 0000000..b9f0316
--- /dev/null
+++ b/uart.h
@@ -0,0 +1,48 @@
+/*
+ * uart.h - UART Rx and Tx definitions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _UART_H_
+#define _UART_H_
+
+#include "common.h"
+
+struct uart_ack_header_t {
+	uint32_t magic;
+	uint32_t entry_point;
+	uint32_t size;
+	uint8_t  *recv_buffer;
+	uint8_t  *inflate_dstbuf;
+};
+
+void uart_boot(uint32_t *jump_entry_point);
+
+void uart_send_lf(void);
+void uart_send_str(char *string);
+void uart_send_str_lf(char *string);
+void uart_send_hexnum(uint32_t value, int digits);
+
+int uart_get_cmd(uint32_t *boot_cmd);
+
+uint32_t uart_get_prog(struct uart_ack_header_t *uart_ack_header);
+
+#endif /* _UART_H_ */
diff --git a/uartboot.c b/uartboot.c
new file mode 100644
index 0000000..f0437c0
--- /dev/null
+++ b/uartboot.c
@@ -0,0 +1,261 @@
+/*
+ * uartboot.c - UART boot mode
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "uart.h"
+#include "util.h"
+#include "crc.h"
+#if defined(FLASH_TYPE_NOR)
+#include "nor.h"
+#elif defined(FLASH_TYPE_NAND)
+#include "nand.h"
+#endif
+
+/* Symbols from linker script */
+extern uint32_t __DDR_START;
+extern uint32_t __DDR_SIZE;
+
+static int
+ddr_memory_test(void)
+{
+	int k;
+	volatile uint32_t *ddr_start = &__DDR_START;
+	const uint32_t ddr_size32 = ((uint32_t) &__DDR_SIZE) / 4;
+	uint32_t read32;
+
+	log_info("DDR tests");
+
+	log_info("1. RAMP test:");
+	for (k = 0; k < ddr_size32; k++)
+		ddr_start[k] = k; /* Write */
+
+	for (k = 0; k < ddr_size32; k++) {
+		read32 = ddr_start[k]; /* Read */
+		if (read32 != k)
+			goto error;
+	}
+	log_info("  Success");
+
+	log_info("2. PATTERN test:");
+	for (k = 0; k < ddr_size32; k++)
+		ddr_start[k] = DDR_TEST_PATTERN; /* Write */
+
+	for (k = 0; k < ddr_size32; k++) {
+		read32 = ddr_start[k]; /* Read */
+		if (read32 != DDR_TEST_PATTERN)
+			goto error;
+	}
+	log_info("  Success");
+
+	host_msg("DDRTEST_SUCCESS");
+	return 0;
+
+error:
+	uart_send_str("Failed at address: ");
+	uart_send_hexnum(k * 4, 8);
+	uart_send_str(", Expected: ");
+	uart_send_hexnum(k, 8);
+	uart_send_str(", Read: ");
+	uart_send_hexnum(read32, 8);
+	uart_send_lf();
+	host_msg("DDRTEST_FAILURE");
+	return -1;
+}
+
+void
+uart_boot(uint32_t *jump_entry_point)
+{
+#if defined(FLASH_TYPE_NAND)
+	struct nand_image_descriptor_t im_desc;
+#elif defined(FLASH_TYPE_NOR)
+	struct nor_boot_t norBoot;
+	uint32_t blkAddress, blkSize, baseAddress;
+#endif
+	struct uart_ack_header_t uart_ack_header;
+	uint32_t boot_cmd;
+
+	crc32_dv_build_table();
+	log_info("Starting UART Boot");
+	host_msg("BOOTPSP");
+
+	/* Get the BOOT command */
+	if (uart_get_cmd(&boot_cmd) != E_PASS)
+		goto uartboot_error;
+
+	/* Set the entry point to reset by default */
+	*jump_entry_point = 0x0;
+
+	switch (boot_cmd) {
+	case UBL_CMD_DDR_TEST:
+		/* Perform DDR memory testing. */
+		ddr_memory_test();
+		break;
+
+		/* Download via UART UBL and APP and burn to flash. */
+	case UBL_CMD_FLASH_UBL_APP:
+		host_msg("SENDUBL");
+
+		/* Download UBL into GZIP format */
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		log_info("Writing UBL");
+
+#if defined(FLASH_TYPE_NOR)
+		NOR_Erase(nor_get_flashbase(), uart_ack_header.size);
+
+		/* Write binary UBL to NOR flash. */
+		NOR_WriteBytes(nor_get_flashbase(), uart_ack_header.size,
+			       (uint32_t) uart_ack_header.recv_buffer);
+#elif defined(FLASH_TYPE_NAND)
+		im_desc.magic = uart_ack_header.magic;
+		im_desc.entry_point = uart_ack_header.entry_point;
+		im_desc.block_num = START_UBL_BLOCK_NUM;
+		im_desc.load_address = 0; /* Load address not used by RBL */
+
+		if (nand_write_prog(&im_desc, uart_ack_header.recv_buffer,
+				    uart_ack_header.size) != E_PASS)
+			goto uartboot_error;
+#endif
+
+		/* Indicate that UBL flashing was successfull. */
+		host_msg("DONE");
+
+		host_msg("SENDAPP");
+
+		/* Get the application header and data */
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		log_info("Writing APP");
+
+#if defined(FLASH_TYPE_NOR)
+		/* Erase the NOR flash where header and data will go */
+		DiscoverBlockInfo((nor_get_flashbase() + UBL_IMAGE_SIZE),
+				  &blkSize, &blkAddress);
+		baseAddress = blkAddress + blkSize;
+		NOR_Erase(baseAddress, uart_ack_header.size + sizeof(norBoot));
+
+		/* MagicFlag for Application (binary or safe) */
+		norBoot.magicNum = uart_ack_header.magic;
+
+		/* Bytes of application (either srec or binary) */
+		norBoot.appSize = uart_ack_header.size;
+
+		/* Value from ACK header */
+		norBoot.entryPoint = uart_ack_header.entry_point;
+
+		/* Semi-hardcoded load address to entry point. FIXME */
+		norBoot.ldAddress = uart_ack_header.entry_point;
+
+		/* Write the struct nor_boot_t header to the flash */
+		NOR_WriteBytes(baseAddress, sizeof(norBoot),
+			       (uint32_t) &norBoot);
+
+		/* Write the application data to the flash */
+		NOR_WriteBytes((baseAddress + sizeof(norBoot)),
+			       uart_ack_header.size,
+			       (uint32_t) uart_ack_header.recv_buffer);
+
+		/* Semi-hardcoded load address to entry point. FIXME */
+		if (nor_write_prog(&nor_boot, uart_ack_header.recv_buffer,
+				   uart_ack_header.size,
+				   baseAddress + sizeof(norBoot),
+				   uart_ack_header.magic,
+				   uart_ack_header.entry_point,
+				   uart_ack_header.entry_point) != E_PASS)
+			goto uartboot_error;
+#elif defined(FLASH_TYPE_NAND)
+		im_desc.magic = uart_ack_header.magic;
+		im_desc.entry_point = uart_ack_header.entry_point;
+		im_desc.block_num = START_APP_BLOCK_NUM;
+		/* Assuming load address is identical to entry point. */
+		im_desc.load_address = uart_ack_header.entry_point;
+
+		if (nand_write_prog(&im_desc, uart_ack_header.recv_buffer,
+				    uart_ack_header.size) != E_PASS)
+			goto uartboot_error;
+#endif
+
+		/* Indicate that APP flashing was successfull. */
+		host_msg("DONE");
+
+		break;
+
+	case UBL_CMD_FLASH_DATA:
+		host_msg("SENDDATA");
+
+		/* Get the data block infos and actual bytes */
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		log_info("Writing DATA");
+
+		im_desc.magic = uart_ack_header.magic;
+		im_desc.block_num = uart_ack_header.entry_point; /* Block in flash */
+
+		if (nand_write_prog(&im_desc, uart_ack_header.recv_buffer,
+				    uart_ack_header.size) != E_PASS)
+			goto uartboot_error;
+
+		/* Indicate that APP flashing was successfull. */
+		host_msg("DONE");
+
+		break;
+
+	case UBL_CMD_FLASH_ERASE:
+		log_info("Erasing whole flash");
+
+#if defined(FLASH_TYPE_NOR)
+		if (NOR_GlobalErase() != E_PASS) {
+			log_info("Erase failed");
+			goto uartboot_error;
+		}
+#elif defined(FLASH_TYPE_NAND)
+		if (nand_erase_all() != E_PASS) {
+			log_info("Erase failed");
+			goto uartboot_error;
+		}
+#endif
+
+		log_info("Erase successfull");
+
+		break;
+	default:
+		/* Load and run application */
+		host_msg("SENDAPP");
+
+		if (uart_get_prog(&uart_ack_header) != E_PASS)
+			goto uartboot_error;
+
+		*jump_entry_point = uart_ack_header.entry_point;
+		break;
+	} /* end switch statement */
+
+	return;
+
+uartboot_error:
+	/* Set the entry point to reset. */
+	*jump_entry_point = 0x0;
+}
diff --git a/ubl.c b/ubl.c
new file mode 100644
index 0000000..0e7e1ed
--- /dev/null
+++ b/ubl.c
@@ -0,0 +1,251 @@
+/*
+ * ubl.c - main file
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "common.h"
+#include "davinci.h"
+#include "uart.h"
+#include "util.h"
+#if defined(FLASH_TYPE_NOR)
+#include "nor.h"
+#elif defined(FLASH_TYPE_NAND)
+#include "nand.h"
+#endif
+
+#define C1_IC	(1 << 12) /* icache off/on */
+
+static uint32_t jump_entry_point;
+
+enum bootmode_t bootmode;
+
+/* read co-processor 15, register #1 (control register) */
+static uint32_t
+read_p15_c1(void)
+{
+	uint32_t value;
+
+	__asm__ __volatile__(
+		"mrc    p15, 0, %0, c1, c0, 0   @ read control reg\n"
+		: "=r" (value)
+		:
+		: "memory");
+
+	return value;
+}
+
+/* write to co-processor 15, register #1 (control register) */
+static void
+write_p15_c1(uint32_t value)
+{
+	__asm__ __volatile__(
+		"mcr    p15, 0, %0, c1, c0, 0   @ write it back\n"
+		:
+		: "r" (value)
+		: "memory");
+
+	read_p15_c1();
+}
+
+static void
+cp_delay(void)
+{
+	volatile int i;
+
+	/* copro seems to need some delay between reading and writing */
+	for (i = 0; i < 100; i++)
+		;
+}
+
+static void
+icache_enable(void)
+{
+	uint32_t reg;
+
+	reg = read_p15_c1(); /* get control reg. */
+	cp_delay();
+	write_p15_c1(reg | C1_IC);
+}
+
+static int
+ubl_main(void)
+{
+	int status;
+
+	/* Read boot mode */
+	bootmode = (enum bootmode_t) (((SYSTEM->BOOTCFG) & 0xC0) >> 6);
+
+	/* Wait until the RBL is done using the UART. */
+	if (bootmode == NON_SECURE_UART)
+		while ((UART0->LSR & 0x40) == 0);
+
+	status = davinci_platform_init(UBL_VERSION_STR);
+	if (status != E_PASS)
+		goto error;
+
+#if defined(FLASH_TYPE_NOR)
+	status = NOR_Init();
+#elif defined(FLASH_TYPE_NAND)
+	status = nand_init();
+#endif
+	if (status != E_PASS) {
+		uart_send_str("flash init failed");
+		goto error;
+	}
+
+	uart_send_str("BootMode = ");
+
+	/* Select Boot Mode */
+	switch (bootmode) {
+#if defined(FLASH_TYPE_NAND)
+	case NON_SECURE_NAND:
+		log_info("NAND"); /* Report boot mode to host */
+
+		/* Copy binary application data from NAND to DDRAM */
+		if (nand_copy(&jump_entry_point) != E_PASS) {
+			log_info("Boot failed.");
+			goto UARTBOOT;
+		}
+		break;
+#elif defined(FLASH_TYPE_NOR)
+	case NON_SECURE_NOR:
+		log_info("NOR"); /* Report boot mode to host */
+
+		/* Copy binary application data from NOR to DDRAM */
+		if (nor_copy() != E_PASS) {
+			log_info("Boot failed.");
+			goto UARTBOOT;
+		}
+		break;
+#endif
+	case NON_SECURE_UART:
+		log_info("UART"); /* Report boot mode to host */
+		goto UARTBOOT;
+		break;
+	default:
+UARTBOOT:
+		uart_boot(&jump_entry_point);
+		break;
+	}
+
+	waitloop(10000);
+
+	/* Disabling UART timeout timer */
+	while ((UART0->LSR & 0x40) == 0)
+		;
+	TIMER0->TCR = 0x00000000;
+
+	return E_PASS;
+
+error:
+	jump_entry_point = 0; /* Reset */
+	return E_FAIL;
+}
+
+/*
+ * boot() has naked attribute (doesn't save registers since it is the entry
+ * point out of boot and it doesn't have an exit point). This setup requires
+ * that the gnu compiler uses the -nostdlib option.
+ */
+__attribute__((naked, section(".boot"))) void boot(void);
+
+void
+boot(void)
+{
+	void (*app_entry_function)(void);
+	extern uint32_t __topstack; /* symbol defined in linker script */
+	register uint32_t *stackpointer asm("sp");
+
+	asm(" MRS	r0, cpsr");
+	asm(" BIC	r0, r0, #0x1F"); /* Clear MODES */
+	asm(" ORR	r0, r0, #0x13"); /* Set SUPERVISOR mode */
+	asm(" ORR	r0, r0, #0xC0"); /* Disable FIQ and IRQ */
+	asm(" MSR	cpsr, r0");
+
+	/* Set the IVT to low memory, leave MMU & caches disabled */
+	asm(" MRC	p15, 0, r1, c1, c0, 0");
+	asm(" BIC	r0,r0,#0x00002000");
+	asm(" MCR	p15, 0, r1, c1, c0, 0");
+
+	/* Stack setup */
+	stackpointer = &(__topstack);
+
+	icache_enable();
+
+	/* Call to main code */
+	ubl_main();
+
+	uart_send_str("Starting app at: ");
+	uart_send_hexnum((uint32_t) jump_entry_point, 8);
+	uart_send_lf();
+
+	/* Jump to entry point */
+	app_entry_function = (void *) jump_entry_point;
+	(*app_entry_function)();
+}
+
+/*
+ * selfcopy() has naked attribute (doesn't save registers since it is the
+ * entry point when the UBL is found at the base of the NOR Flash and then
+ * goes  directly to the the boot() function, which is also naked). This setup
+ * requires that the gnu compiler uses the -nostdlib option.
+ */
+#if defined(FLASH_TYPE_NOR)
+__attribute__((naked, section(".selfcopy"))) void selfcopy(void);
+
+void
+selfcopy(void)
+{
+	volatile uint32_t *src = &(__selfcopysrc);
+	volatile uint32_t *dest = &(__selfcopydest);
+	volatile uint32_t *destend = &(__selfcopydestend);
+	extern uint32_t __selfcopysrc, __selfcopydest, __selfcopydestend;
+
+	/* Enable ITCM */
+	asm(" MRC	p15, 0, r0, c9, c1, 1");
+	asm(" MOV	r0, #0x1");
+	asm(" MCR	p15, 0, r0, c9, c1, 1");
+
+	/* Enable DTCM */
+	asm(" MRC	p15, 0, r0, c9, c1, 0");
+	asm(" MOV	r0, #0x8000");
+	asm(" ORR	r0, r0, #0x1");
+	asm(" MCR	p15, 0, r0, c9, c1, 0");
+
+	/* Copy the words */
+	while (dest < destend) {
+		*dest = *src;
+		dest++;
+		src++;
+	}
+
+	/* Jump to the normal entry point */
+	boot();
+}
+
+__attribute__ ((naked, section(".fakeentry"))) void fake_entry(void);
+
+void
+fake_entry(void)
+{
+	boot();
+}
+#endif /* FLASH_TYPE_NOR */
diff --git a/ubl.lds b/ubl.lds
new file mode 100644
index 0000000..f6af652
--- /dev/null
+++ b/ubl.lds
@@ -0,0 +1,103 @@
+/*
+ * dm35x.lds - DM35x linker script file
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+ENTRY(boot)
+SECTIONS {
+	/* Common definitions */
+	__EMIF_START  = 0x02000000;
+	__EMIF_SIZE   = 0x02000000;
+	__IRAM_START  = 0x00000000;
+	__DDR_START   = 0x80000000;
+
+	STACKStart = __DRAM_START + __DRAM_SIZE;	
+	__topstack = (__DRAM_START + __DRAM_SIZE) - 0x4;
+  
+	. = __EMIF_START;
+
+	__CS2start = .;
+	__NORFlash = .;
+	__NANDFlash = .;
+
+	.selfcopy : AT (0x0)
+	{
+		*(.selfcopy)		
+		. = ALIGN(256);
+	}
+	
+	__selfcopysrc = .;
+	__IVT = __DRAM_START;
+	__selfcopydest = __DRAM_START + 0x00000020;
+
+	. = 0x20;
+	.fakeentry : AT ( LOADADDR(.selfcopy) + SIZEOF(.selfcopy))
+	{
+		*(.fakeentry)
+		. = ALIGN(4);
+	}
+		
+	.text		: AT ( LOADADDR(.fakeentry) + SIZEOF(.fakeentry) )
+	{
+		*(.text)
+		. = ALIGN(4);
+	}
+	.boot		: AT ( LOADADDR(.text) + SIZEOF(.text))
+	{
+		*(.boot)
+		. = ALIGN(4);
+	}
+
+        /* Move into DRAM for placing const and data sections */
+        . += (__DRAM_START - __IRAM_START);
+
+	.rodata		: AT ( LOADADDR(.boot) + SIZEOF(.boot) )
+	{
+		*(.rodata*)
+		*(.rodata)
+		. = ALIGN(4);
+	}	
+
+	.data		: AT ( LOADADDR(.rodata) + SIZEOF(.rodata) )
+	{
+		*(.data)
+		. = ALIGN(4);
+	}
+			
+	__selfcopydestend = __selfcopydest + SIZEOF(.fakeentry) +
+						SIZEOF(.text) + SIZEOF(.boot) +
+						SIZEOF(.data) + SIZEOF(.rodata);
+	
+	.bss		:
+	{
+		*(.bss) *(COMMON)
+		. = ALIGN(4);
+	}
+
+	/* DDR2 */
+	. = __DDR_START;
+	.ddrram (NOLOAD) :
+	{
+		*(.ddrram)
+	}
+	__DDR_FREE = .;
+        __DDR_END  = __DDR_START + __DDR_SIZE - 1;
+}
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..2031126
--- /dev/null
+++ b/util.c
@@ -0,0 +1,83 @@
+/*
+ * util.c - miscellaneous functions
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "common.h"
+
+#define MAXSTRLEN 256
+
+/* Endian swap */
+void
+endian_data(uint32_t *data)
+{
+	uint32_t temp = *data;
+
+	*data = ENDIAN_SWAP(temp);
+}
+
+void *
+memcpy(void *dest, const void *src, size_t n)
+{
+	const uint8_t *src8 = src;
+	uint8_t *dst8 = dest;
+
+	for (; n > 0; n--) {
+		*dst8 = *src8;
+		dst8++;
+		src8++;
+	}
+
+	return dest;
+}
+
+/* Get string length by finding null terminating char */
+size_t
+strlen(const char *s)
+{
+	int i = 0;
+
+	while ((s[i] != 0) && (i < MAXSTRLEN))
+		i++;
+
+	if (i == MAXSTRLEN)
+		return -1;
+	else
+		return i;
+}
+
+/* Simple wait loop */
+void
+waitloop(int32_t loopcnt)
+{
+	for (; loopcnt > 0; loopcnt--)
+		asm("   NOP");
+}
+
+void
+sleep_ms(int ms)
+{
+	for (; ms > 0; ms--)
+		waitloop(20000);
+}
diff --git a/util.h b/util.h
new file mode 100644
index 0000000..e36bb47
--- /dev/null
+++ b/util.h
@@ -0,0 +1,35 @@
+/*
+ * util.h - miscellaneous functions prototypes
+ *
+ * Copyright (C) 2008 Hugo Villeneuve <hugo@hugovil.com>
+ *
+ * Based on TI DaVinci Flash and Boot Utilities, original copyright follows:
+ *   Copyright 2008 Texas Instruments, Inc. <www.ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _UTIL_H_
+#define _UTIL_H_
+
+#include <stdint.h>
+#include <string.h> /* For memcpy & memset prototypes */
+
+void endian_data(uint32_t *data);
+
+void waitloop(int32_t loopcnt);
+void sleep_ms(int ms);
+
+#endif /* _UTIL_H_ */