Index: trunk/nv.lua
===================================================================
--- trunk/nv.lua	(revision 483)
+++ trunk/nv.lua	(revision 484)
@@ -88,4 +88,12 @@
 	links { "nv-core", "nv-lib", "nv-lua" }
 
+project "nv-image"
+	location (_ACTION.."/"..NV_RUNTIME)
+	language "C++"
+	kind "StaticLib"
+	includedirs { "." }
+	files { "nv/image/**.hh", "src/image/**.cc" }
+	links { "nv-core" }
+
 project "nv-formats"
 	location (_ACTION.."/"..NV_RUNTIME)
Index: trunk/nv/core/ascii_printer.hh
===================================================================
--- trunk/nv/core/ascii_printer.hh	(revision 484)
+++ trunk/nv/core/ascii_printer.hh	(revision 484)
@@ -0,0 +1,42 @@
+// Copyright (C) 2013-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+/**
+* @file terminal.hh
+* @author Kornel Kisielewicz epyon@chaosforge.org
+* @brief terminal interface
+*/
+
+#ifndef NV_CORE_ASCII_PRINTER_HH
+#define NV_CORE_ASCII_PRINTER_HH
+
+#include <nv/common.hh>
+#include <nv/stl/string.hh>
+#include <nv/core/position.hh>
+#include <nv/interface/terminal.hh>
+
+namespace nv
+{
+	class ascii_printer
+	{
+	public:
+		explicit ascii_printer( terminal * term );
+
+		void print( uchar8 ch, const position& p, uint32 color )
+		{
+			m_terminal->print( p, color, ch );
+		}
+		void print( const string_view& text, const position& p, uint32 color );
+//		void print( const string_view& text, const position& p, uint32 color, const rectangle& scissor );
+		void frame( const nv::rectangle& area, const nv::string_view& border_chars, uint32 color );
+
+	protected:
+		terminal * m_terminal;
+	};
+
+}
+
+#endif // NV_CORE_ASCII_PRINTER_HH
Index: trunk/nv/core/resource.hh
===================================================================
--- trunk/nv/core/resource.hh	(revision 483)
+++ trunk/nv/core/resource.hh	(revision 484)
@@ -17,4 +17,5 @@
 #include <nv/stl/string.hh>
 #include <nv/stl/hash_store.hh>
+#include <nv/stl/vector.hh>
 
 
@@ -31,19 +32,25 @@
 	class resource_handler
 	{
-
-	protected:
-		virtual const void* lock( resource_id id, resource_type_id type_hash, bool checked = true ) = 0;
-//		virtual const void* create( resource_id id, resource_type_id type_hash ) = 0;
-		virtual void unlock( resource_id id, resource_type_id type_hash ) = 0;
-		virtual void release( resource_id id, resource_type_id type_hash ) = 0;
+	public:
+		resource_handler( resource_type_id id ) : m_type_id( id ) {}
+
+	protected:
+		virtual const void* raw_lock( resource_id id, bool checked = true ) = 0;
+		virtual void raw_add( resource_id id, void* value ) = 0;
+		virtual void unlock( resource_id id ) = 0;
+		virtual void release( resource_id id ) = 0;
+		virtual bool exists( resource_id id ) = 0;
+		virtual bool load_resource( const string_view& id ) = 0;
 
 		template< typename T >
 		const T* lock( resource_id id, bool checked = true )
 		{
-			return reinterpret_cast< const T* >( lock( id, shash64( rtti_type_hash<T>::hash() ), checked ) );
+			NV_ASSERT( rtti_type_hash<T>::hash() == m_type_id.value(), "Wrong type lock!" );
+			return reinterpret_cast< const T* >( raw_lock( id, checked ) );
 		}
 		template< typename T >
 		resource< T > create( resource_id id )
 		{
+			NV_ASSERT( rtti_type_hash<T>::hash() == m_type_id.value(), "Wrong type creation!" );
 			resource< T > result;
 			result.m_id = id;
@@ -52,9 +59,5 @@
 		}
 
-// 		template< typename T >
-// 		resource< T > create( resource_id id )
-// 		{
-// 			return reinterpret_cast<const T*>( create( id, rtti_type_hash<T>::hash() ) );
-// 		}
+
 
 		template < typename T >
@@ -64,5 +67,7 @@
 		template < typename T >
 		friend class resource_lock;
-//		friend class resource_manager;
+		friend class resource_manager;
+
+		resource_type_id m_type_id;
 	};
 
@@ -79,5 +84,5 @@
 		~resource()
 		{
-			if ( m_handler ) m_handler->release( m_id, shash64( rtti_type_hash<T>::hash() ) );
+			if ( m_handler ) m_handler->release( m_id );
 		}
 	protected:
@@ -156,5 +161,5 @@
 			, m_handler( r.m_handler )
 			, m_resource( r.m_handler->lock<T>( r.m_id ) ) {}
-		explicit resource_lock( const resource_handle< T >& r, resource_handler* handler ) : m_id( r.m_id ), m_handler( handler ), m_resource( handler->lock( r.m_id, shash64( hash_value ) ) ) {}
+		explicit resource_lock( const resource_handle< T >& r, resource_handler* handler ) : m_id( r.m_id ), m_handler( handler ), m_resource( handler->raw_lock( r.m_id ) ) {}
 		resource_lock( const resource_lock& ) = delete;
 		resource_lock& operator=( const resource_lock& other ) = delete;
@@ -170,5 +175,5 @@
 			if ( this != &other )
 			{
-				if ( m_handler ) m_handler->unlock( m_id, shash64( rtti_type_hash<T>::value ) );
+				if ( m_handler ) m_handler->unlock( m_id );
 				m_id       = other.m_id;
 				m_handler  = other.m_handler;
@@ -184,5 +189,5 @@
 		~resource_lock()
 		{
-			if ( m_handler ) m_handler->unlock( m_id, shash64( rtti_type_hash<T>::value ) );
+			if ( m_handler ) m_handler->unlock( m_id );
 		}
 	private:
@@ -193,58 +198,67 @@
 
 
-// 	class resource_manager : public resource_handler
-// 	{
-// 	public:
-// 		template < typename T >
-// 		shash64 register_resource_handler( resource_handler* handler )
-// 		{
-// 			return register_resource_handler( handler, rtti_type_hash<T>::hash() );
-// 		}
-// 
-// 		~resource_manager()
-// 		{
-// 			for ( auto p : m_handlers )
-// 			{
-// 				delete p.second;
-// 			}
-// 		}
-// 	protected:
-// 		virtual const void* lock( resource_id id, resource_type_id type_hash )
-// 		{
-// 			auto handler = m_handlers.find( type_hash );
-// 			NV_ASSERT( handler != m_handlers.end(), "Handler not registered!" );
-// 			return handler->second->lock( id, type_hash );
-// 		}
-// 
-// // 		virtual const void* create( resource_id id, resource_type_id type_hash )
-// // 		{
-// // 			auto handler = m_handlers.find( type_hash );
-// // 			NV_ASSERT( handler != m_handlers.end(), "Handler not registered!" );
-// // 			return handler->second->create( id, type_hash );
-// // 		}
-// 
-// 		virtual void unlock( resource_id id, resource_type_id type_hash )
-// 		{
-// 			auto handler = m_handlers.find( type_hash );
-// 			NV_ASSERT( handler != m_handlers.end(), "Handler not registered!" );
-// 			handler->second->unlock( id, type_hash );
-// 		}
-// 
-// 		virtual void release( resource_id id, resource_type_id type_hash )
-// 		{
-// 			auto handler = m_handlers.find( type_hash );
-// 			NV_ASSERT( handler != m_handlers.end(), "Handler not registered!" );
-// 			handler->second->release( id, type_hash );
-// 		}
-// 
-// 		resource_type_id register_resource_handler( resource_handler* handler, resource_type_id type_hash )
-// 		{
-// 			NV_ASSERT( m_handlers.find( type_hash ) == m_handlers.end(), "Handler already registered!" );
-// 			m_handlers[type_hash] = handler;
-// 		}
-// 
-// 	protected:
-// 		hash_store< resource_type_id, resource_handler* > m_handlers;
-// 	};
+	class resource_manager
+	{
+	public:
+		template < typename T, typename Manager >
+		Manager* register_resource_handler( Manager* handler )
+		{
+			return static_cast<Manager*>( register_resource_handler( handler, resource_type_id( rtti_type_hash<T>::value ) ) );
+		}
+
+		template < typename T >
+		resource< T > get( const string_view& id )
+		{
+			auto m = m_handlers.find( resource_type_id( rtti_type_hash<T>::value ) );
+			NV_ASSERT( m != m_handlers.end(), "Resource type unrecognized!" );
+			if ( m->second->exists( id ) || m->second->load_resource( id ) )
+				return m->second->create< T >( id );
+			// NV_ASSERT( false, "resource_manager.get failed!" );
+			return resource< T >();
+		}
+
+		template < typename T >
+		resource< T > get( uint64 id )
+		{
+			auto m = m_handlers.find( resource_type_id( rtti_type_hash<T>::value ) );
+			NV_ASSERT( m != m_handlers.end(), "Resource type unrecognized!" );
+			if ( m->second->exists( id ) )
+			{
+				return m->second->create< T >( id );
+			}
+			// NV_ASSERT( false, "resource_manager.get failed!" );
+			return resource< T >();
+		}
+
+		template < typename T >
+		resource< T > add( shash64 id, T* value )
+		{
+			auto m = m_handlers.find( resource_type_id( rtti_type_hash<T>::value ) );
+			NV_ASSERT( m != m_handlers.end(), "Resource type unrecognized!" );
+			m->second->raw_add( id, value );
+			return m->second->create< T >( id );
+		}
+
+		virtual ~resource_manager()
+		{
+			// reverse destruction order
+			for ( auto i = m_handler_list.rbegin(); i != m_handler_list.rend(); ++i )
+				delete *i;
+		}
+	protected:
+
+		resource_handler* register_resource_handler( resource_handler* handler, resource_type_id type_hash )
+		{
+			NV_ASSERT( m_handlers.find( type_hash ) == m_handlers.end(), "Handler already registered!" );
+			m_handlers[type_hash] = handler;
+			m_handler_list.push_back( handler );
+			return handler;
+		}
+
+	protected:
+		// needed for destruction order
+		vector< resource_handler* > m_handler_list;
+		hash_store< resource_type_id, resource_handler* > m_handlers;
+	};
 }
 
Index: trunk/nv/engine/animation.hh
===================================================================
--- trunk/nv/engine/animation.hh	(revision 484)
+++ trunk/nv/engine/animation.hh	(revision 484)
@@ -0,0 +1,162 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+/**
+ * @file animation.hh
+ * @author Kornel Kisielewicz
+ * @brief animation
+ */
+
+#ifndef NV_ENGINE_ANIMATION_HH
+#define NV_ENGINE_ANIMATION_HH
+
+#include <nv/common.hh>
+#include <nv/core/resource.hh>
+#include <nv/interface/mesh_data.hh>
+#include <nv/gfx/skeleton_instance.hh>
+
+namespace nv
+{
+	class animator_bind_data;
+	class animator_data;
+}
+
+NV_RTTI_DECLARE_NAME( nv::mesh_nodes_data, "nv::mesh_nodes_data" )
+NV_RTTI_DECLARE_NAME( nv::animator_bind_data, "nv::animator_bind_data" )
+NV_RTTI_DECLARE_NAME( nv::animator_data, "nv::animator_data" )
+
+namespace nv
+{
+
+	class animator_clip_data
+	{
+	public:
+		animator_clip_data( resource< mesh_nodes_data > data, frame_range frange )
+			: m_data( data )
+			, m_range( frange )
+		{
+		}
+		// 	uint32 get_start_frame() const { return m_range.start; }
+		// 	uint32 get_end_frame() const { return m_range.end; }
+		// 	uint32 get_frame_count() const { return m_range.duration(); }
+		// 	bool is_looping() const { return m_range.is_looping; }
+		resource< mesh_nodes_data > get_data() const { return m_data; }
+		const frame_range& get_range() const { return m_range; }
+	protected:
+		resource< mesh_nodes_data > m_data;
+		frame_range                 m_range;
+	};
+
+	class animator_data
+	{
+	public:
+		animator_data() {}
+		const animator_clip_data* get_clip( shash64 name ) const
+		{
+			auto it = m_clip_data.find( name );
+			return ( it != m_clip_data.end() ? &it->second : nullptr );
+		}
+		bool has_clip( shash64 name ) const
+		{
+			auto it = m_clip_data.find( name );
+			return it != m_clip_data.end();
+		}
+		void add_clip( shash64 name, animator_clip_data&& entry )
+		{
+			m_clip_data.assign( name, entry );
+		}
+	protected:
+		hash_store< shash64, animator_clip_data > m_clip_data;
+	};
+
+	// resource per animator/model set
+	class animator_bind_data
+	{
+	public:
+		animator_bind_data( const data_node_list& bones )
+			: m_bone_list( bones.get_name() )
+		{
+			// TODO: m_bone_list.clone
+			for ( auto bone : bones )
+			{
+				m_bone_list.append( bone );
+			}
+			m_bone_transforms.prepare( m_bone_list );
+		}
+		void add_binding( shash64 id, const mesh_nodes_data* nodes )
+		{
+			m_bind_data[id].prepare( nodes, m_bone_list );
+		}
+
+		bool has_binding( shash64 id ) const { return m_bind_data.find( id ) != m_bind_data.end(); }
+		const bone_transforms& get_bone_transforms() const { return m_bone_transforms; }
+		const skeleton_binding& get_binding( shash64 id ) const
+		{
+			return m_bind_data.at( id );
+		}
+	protected:
+		hash_store< shash64, skeleton_binding > m_bind_data;
+		bone_transforms                         m_bone_transforms;
+		data_node_list                          m_bone_list;
+	};
+
+	// per model instance
+	class animator_instance
+	{
+	public:
+		animator_instance() {}
+		void initialize( resource< animator_data > data, resource< animator_bind_data > bind_data )
+		{
+			m_data = data;
+			m_bind_data = bind_data;
+		}
+		bool is_valid() const { return m_data.is_valid() && m_bind_data.is_valid(); }
+		bool has_clip( shash64 id ) const
+		{
+			if ( auto data = m_data.lock() )
+				return data->has_clip( id );
+			else
+				return false;
+		}
+		mat4 get_transform( uint32 idx ) const
+		{
+			if ( idx < m_skeleton.size() ) return m_skeleton.transforms()[idx];
+			return mat4();
+		}
+		const skeleton_instance& get_skeleton() const
+		{
+			return m_skeleton;
+		}
+
+		void update( shash64 clip, uint32 time )
+		{
+			if ( is_valid() )
+			{
+				auto data = m_data.lock();
+				auto bind_data = m_bind_data.lock();
+				const animator_clip_data* clip_data = data->get_clip( clip );
+				if ( clip_data )
+				{
+					if ( auto nodes = clip_data->get_data().lock() )
+					{
+						float fframe = clip_data->get_range().frame_from_ms_fps( time, nodes->get_fps() );
+						m_transforms.animate( &*nodes, bind_data->get_binding( clip ), fframe );
+					}
+				}
+				m_skeleton.assign( m_transforms, bind_data->get_bone_transforms() );
+			}
+		}
+	protected:
+		resource< animator_data >      m_data;
+		resource< animator_bind_data > m_bind_data;
+		skeleton_transforms            m_transforms;
+		skeleton_instance              m_skeleton;
+	};
+
+
+}
+
+#endif // NV_ENGINE_ANIMATION_HH
Index: trunk/nv/engine/image_manager.hh
===================================================================
--- trunk/nv/engine/image_manager.hh	(revision 484)
+++ trunk/nv/engine/image_manager.hh	(revision 484)
@@ -0,0 +1,38 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+/**
+* @file image_manager.hh
+* @author Kornel Kisielewicz
+* @brief image_manager
+*/
+
+#ifndef NV_ENGINE_IMAGE_MANAGER_HH
+#define NV_ENGINE_IMAGE_MANAGER_HH
+
+#include <nv/common.hh>
+#include <nv/core/resource.hh>
+#include <nv/interface/image_data.hh>
+#include <nv/engine/resource_system.hh>
+
+namespace nv
+{
+
+	NV_RTTI_DECLARE_NAME( image_data, "image_data" )
+
+	class image_manager : public manual_resource_manager< image_data >
+	{
+	public:
+		image_manager() {}
+	protected:
+		virtual bool load_resource( const string_view& id );
+	};
+
+
+
+}
+
+#endif // NV_ENGINE_IMAGE_MANAGER_HH
Index: trunk/nv/engine/material_manager.hh
===================================================================
--- trunk/nv/engine/material_manager.hh	(revision 484)
+++ trunk/nv/engine/material_manager.hh	(revision 484)
@@ -0,0 +1,69 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+/**
+* @file material_manager.hh
+* @author Kornel Kisielewicz
+* @brief material_manager
+*/
+
+#ifndef NV_ENGINE_MATERIAL_MANAGER_HH
+#define NV_ENGINE_MATERIAL_MANAGER_HH
+
+#include <nv/common.hh>
+#include <nv/core/resource.hh>
+#include <nv/interface/device.hh>
+#include <nv/interface/mesh_data.hh>
+#include <nv/gfx/skeleton_instance.hh>
+#include <nv/engine/resource_system.hh>
+#include <nv/engine/image_manager.hh>
+
+namespace nv
+{
+
+	struct material
+	{
+		string128 paths[ 8 ];
+	};
+
+	struct gpu_material
+	{
+		texture textures[ 8 ];
+	};
+
+	NV_RTTI_DECLARE_NAME( gpu_material, "gpu_material" )
+	NV_RTTI_DECLARE_NAME( material, "material" )
+
+	class material_manager : public lua_resource_manager< material >
+	{
+	public:
+		virtual string_view get_storage_name() const { return "materials"; }
+		virtual string_view get_resource_name() const { return "material"; }
+	protected:
+		virtual bool load_resource( lua::table_guard& table, shash64 id );
+	private:
+	};
+
+	class gpu_material_manager : public manual_resource_manager< gpu_material >
+	{
+	public:
+		gpu_material_manager( context* context, material_manager* matmgr, image_manager* imgmgr )
+			: m_context( context )
+			, m_material_manager( matmgr )
+			, m_image_manager( imgmgr )
+		{}
+	protected:
+		virtual bool load_resource( const string_view& id );
+		virtual void release( gpu_material* m );
+	private:
+		context*          m_context;
+		material_manager* m_material_manager;
+		image_manager*    m_image_manager;
+	};
+
+}
+
+#endif // NV_ENGINE_MATERIAL_MANAGER_HH
Index: trunk/nv/engine/mesh_manager.hh
===================================================================
--- trunk/nv/engine/mesh_manager.hh	(revision 484)
+++ trunk/nv/engine/mesh_manager.hh	(revision 484)
@@ -0,0 +1,61 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+/**
+ * @file mesh_manager.hh
+ * @author Kornel Kisielewicz
+ * @brief mesh_manager
+ */
+
+#ifndef NV_ENGINE_MESH_MANAGER_HH
+#define NV_ENGINE_MESH_MANAGER_HH
+
+#include <nv/common.hh>
+#include <nv/core/resource.hh>
+#include <nv/interface/mesh_data.hh>
+#include <nv/gfx/skeleton_instance.hh>
+#include <nv/engine/resource_system.hh>
+
+
+namespace nv
+{
+
+	enum shader_type
+	{
+		NORMAL = 0,
+		BONE = 1,
+	};
+
+	struct gpu_mesh
+	{
+		vertex_array va;
+		uint32       count;
+		shader_type  shader;
+	};
+
+	NV_RTTI_DECLARE_NAME( data_channel_set, "data_channel_set" )
+	NV_RTTI_DECLARE_NAME( gpu_mesh, "gpu_mesh" )
+
+	using mesh_manager = manual_resource_manager< data_channel_set >;
+
+	class gpu_mesh_manager : public nv::manual_resource_manager< gpu_mesh >
+	{
+	public:
+		gpu_mesh_manager( context* context, mesh_manager* mesh_manager )
+			: m_context( context )
+			, m_mesh_manager( mesh_manager ) {}
+		virtual bool load_resource( const string_view& id );
+		resource< gpu_mesh > load_resource( resource< data_channel_set > mesh );
+	protected:
+		virtual void release( gpu_mesh* m );
+	private:
+		context*      m_context;
+		mesh_manager* m_mesh_manager;
+	};
+
+}
+
+#endif // NV_ENGINE_MESH_MANAGER_HH
Index: trunk/nv/engine/resource_system.hh
===================================================================
--- trunk/nv/engine/resource_system.hh	(revision 483)
+++ trunk/nv/engine/resource_system.hh	(revision 484)
@@ -33,4 +33,5 @@
 		static void free( T* value ) { delete value; }
 		static T* to_pointer( T* value ) { return value; }
+		static T* to_stored( void* value ) { return reinterpret_cast<T*>( value ); }
 	};
 
@@ -41,23 +42,13 @@
 		static void free( T ) {}
 		static T* to_pointer( T& value ) { return &value; }
+		static T to_stored( void* value ) { return *reinterpret_cast<T*>( value ); }
 	};
 
 	class resource_system;
 
-	class custom_resource_manager_base : public resource_handler
-	{
-	public:
-		custom_resource_manager_base() {}
-		virtual void clear() = 0;
-		virtual bool load_resource( const string_view& id ) = 0;
-	protected:
-		virtual void unlock( resource_id, resource_type_id ) {};
-		virtual void release( resource_id, resource_type_id ) {};
-	};
-
 	class lua_resource_manager_base : public resource_handler
 	{
 	public:
-		lua_resource_manager_base() : m_lua( nullptr ) {}
+		lua_resource_manager_base( resource_type_id id ) : resource_handler( id ), m_lua( nullptr ) {}
 		void initialize( lua::state* state );
 		virtual string_view get_storage_name() const = 0;
@@ -65,32 +56,26 @@
 		virtual void clear() = 0;
 		void load_all();
-		bool load_resource( const string_view& id );
+		virtual bool load_resource( const string_view& id );
 		virtual ~lua_resource_manager_base() {}
 	protected:
 		virtual bool load_resource( lua::table_guard& table, shash64 id ) = 0;
-		//		virtual const void* lock( resource_id id, resource_type_id );
-		virtual void unlock( resource_id, resource_type_id ) {};
-		virtual void release( resource_id, resource_type_id ) {};
-
 		lua::state* m_lua;
 	};
 
-	template < typename T, bool Heap = true, typename Base = custom_resource_manager_base >
+	template < typename T, bool Heap = true, typename Base = resource_handler >
 	class custom_resource_manager : public Base
 	{
 	public:
 		typedef resource_storage_policy< T, Heap > policy_type;
+		typedef Base                       base_type;
 		typedef T                          value_type;
 		typedef resource< T >              resource_type;
 		typedef typename policy_type::type stored_type;
 
-		custom_resource_manager() {}
+		custom_resource_manager() : base_type( resource_type_id( rtti_type_hash<T>::hash() ) ) {}
 		resource_type get( const string_view& id )
 		{
-			auto m = m_store.find( id );
-			if ( m != m_store.end() )
-			{
+			if ( exists( id ) )
 				return create< T >( id );
-			}
 			else
 			{
@@ -106,9 +91,5 @@
 		resource_type get( uint64 id )
 		{
-			auto m = m_store.find( shash64( id ) );
-			if ( m != m_store.end() )
-			{
-				return create< T >( shash64( id ) );
-			}
+			if ( exists( shash64( id ) ) ) return create< T >( shash64( id ) );
 			// NV_ASSERT( false, "resource_manager.get failed!" );
 			return resource_type();
@@ -130,4 +111,5 @@
 		}
 	protected:
+
 		resource_type add( shash64 id, stored_type resource )
 		{
@@ -141,5 +123,16 @@
 		}
 
-		virtual const void* lock( resource_id id, resource_type_id, bool )
+		virtual bool exists( resource_id id )
+		{
+			return m_store.find( shash64( id ) ) != m_store.end();
+		}
+
+		virtual void raw_add( resource_id id, void* value )
+		{
+			add( id, policy_type::to_stored( value ) );
+		}
+
+
+		virtual const void* raw_lock( resource_id id, bool )
 		{
 			auto m = m_store.find( id );
@@ -147,4 +140,6 @@
 		}
 
+		virtual void unlock( resource_id ) {};
+		virtual void release( resource_id ) {};
 		virtual void release( stored_type ) {}
 
Index: trunk/nv/gfx/skeleton_instance.hh
===================================================================
--- trunk/nv/gfx/skeleton_instance.hh	(revision 483)
+++ trunk/nv/gfx/skeleton_instance.hh	(revision 484)
@@ -86,5 +86,9 @@
 		void assign( const data_node_list* node_data );
 		void assign( const skeleton_transforms& other );
-		void interpolate( const skeleton_transforms& a, const skeleton_transforms& b, float t );
+		void interpolate_linear( const skeleton_transforms& a, const skeleton_transforms& b, float t );
+		void interpolate_nlerp( const skeleton_transforms& a, const skeleton_transforms& b, float t );
+		void interpolate_slerp( const skeleton_transforms& a, const skeleton_transforms& b, float t );
+		void interpolate4( const skeleton_transforms& s1, const skeleton_transforms& v1, const skeleton_transforms& v2, const skeleton_transforms& s2, float t );
+		void interpolate_squad( const skeleton_transforms& s1, const skeleton_transforms& v1, const skeleton_transforms& v2, const skeleton_transforms& s2, float t );
 		void animate_local( const mesh_nodes_data* node_data, const skeleton_binding& binding, float frame );
 		void animate( const mesh_nodes_data* node_data, const skeleton_binding& binding, float frame, bool local = false )
Index: trunk/nv/image/miniz.hh
===================================================================
--- trunk/nv/image/miniz.hh	(revision 484)
+++ trunk/nv/image/miniz.hh	(revision 484)
@@ -0,0 +1,23 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+#ifndef NV_IMAGE_MINIZ_HH
+#define NV_IMAGE_MINIZ_HH
+
+#include <nv/common.hh>
+#include <nv/interface/image_loader.hh>
+#include <nv/interface/image_data.hh>
+#include <nv/stl/stream.hh>
+
+namespace nv
+{
+
+	void *miniz_decompress( const void *source_buf, size_t source_buf_len, size_t *out_len, bool parse_header );
+
+
+}
+
+#endif // NV_IMAGE_MINIZ_HH
Index: trunk/nv/image/png_loader.hh
===================================================================
--- trunk/nv/image/png_loader.hh	(revision 484)
+++ trunk/nv/image/png_loader.hh	(revision 484)
@@ -0,0 +1,29 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+#ifndef NV_IMAGE_PNG_LOADER_HH
+#define NV_IMAGE_PNG_LOADER_HH
+
+#include <nv/common.hh>
+#include <nv/interface/image_loader.hh>
+#include <nv/interface/image_data.hh>
+#include <nv/stl/stream.hh>
+
+namespace nv
+{
+
+	class png_loader : public image_loader
+	{
+	public:
+		png_loader();
+		virtual bool get_info( stream&, image_format& format, ivec2& size );
+		virtual image_data* load( stream& );
+		virtual image_data* load( stream&, image_format format );
+	};
+
+}
+
+#endif // NV_IMAGE_PNG_LOADER_HH
Index: trunk/nv/interface/device.hh
===================================================================
--- trunk/nv/interface/device.hh	(revision 483)
+++ trunk/nv/interface/device.hh	(revision 484)
@@ -194,5 +194,5 @@
 		virtual string_view get_shader_header() const = 0;
 
-		virtual texture create_texture( image_data* data, sampler asampler ) 
+		virtual texture create_texture( const image_data* data, sampler asampler ) 
 		{
 			return create_texture( data->get_size(), data->get_format(), asampler, data->get_data() );
Index: trunk/nv/interface/image_loader.hh
===================================================================
--- trunk/nv/interface/image_loader.hh	(revision 484)
+++ trunk/nv/interface/image_loader.hh	(revision 484)
@@ -0,0 +1,34 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+/**
+* @file image_loader.hh
+* @author Kornel Kisielewicz
+* @brief image_loader
+*/
+
+#ifndef NV_INTERFACE_IMAGE_LOADER_HH
+#define NV_INTERFACE_IMAGE_LOADER_HH
+
+#include <nv/common.hh>
+#include <nv/stl/stream.hh>
+#include <nv/interface/image_data.hh>
+
+namespace nv
+{
+
+	class image_loader
+	{
+	public:
+		virtual bool get_info( stream&, image_format& format, ivec2& size ) = 0;
+		virtual image_data* load( stream& ) = 0;
+		virtual image_data* load( stream&, image_format format ) = 0;
+		virtual ~image_loader() {};
+	};
+
+}
+
+#endif // NV_INTERFACE_IMAGE_LOADER_HH
Index: trunk/nv/io/c_stream.hh
===================================================================
--- trunk/nv/io/c_stream.hh	(revision 483)
+++ trunk/nv/io/c_stream.hh	(revision 484)
@@ -34,4 +34,5 @@
 		virtual bool gets( char* buffer, size_t max_count );
 		virtual bool seek( long offset, origin orig );
+		virtual bool eof();
 		virtual size_t tell();
 		virtual size_t size();
Index: trunk/nv/stl/math/quaternion.hh
===================================================================
--- trunk/nv/stl/math/quaternion.hh	(revision 483)
+++ trunk/nv/stl/math/quaternion.hh	(revision 484)
@@ -17,4 +17,5 @@
 #include <nv/stl/math/constants.hh>
 #include <nv/stl/math/geometric.hh>
+#include <nv/stl/math/exponential.hh>
 #include <nv/stl/math/angle.hh>
 #include <nv/base/cmath.hh>
@@ -150,5 +151,5 @@
 		inline T length( const tquat<T>& q )
 		{
-			return math::sqrt( dot( q, q ) );
+			return nv::sqrt( dot( q, q ) );
 		}
 
@@ -157,5 +158,6 @@
 		{
 			T len = math::length( q );
-			if ( len <= T( 0 ) ) return tquat<T>( 1, 0, 0, 0 );
+			if ( len <= T( 0 ) ) 
+				return tquat<T>( 1, 0, 0, 0 );
 			T rlen = T( 1 ) / len;
 			return tquat<T>( q.w * rlen, q.x * rlen, q.y * rlen, q.z * rlen );
@@ -188,5 +190,5 @@
 			{
 				T angle = nv::acos( cos_theta );
-				return ( sin( ( T( 1 ) - m ) * angle ) * a + sin( m * angle ) * b ) / sin( angle );
+				return ( nv::sin( ( T( 1 ) - m ) * angle ) * a + nv::sin( m * angle ) * b ) / nv::sin( angle );
 			}
 		}
@@ -195,8 +197,12 @@
 		inline tquat<T> lerp( const tquat<T>& a, const tquat<T>& b, T m )
 		{
-			NV_ASSERT( m >= static_cast<T>( 0 ), "Bad argument to lerp!" );
-			NV_ASSERT( m <= static_cast<T>( 1 ), "Bad argument to lerp!" );
-
 			return a * ( T( 1 ) - m ) + ( b * m );
+		}
+
+		template < typename T >
+		inline tquat<T> nlerp( const tquat<T>& a, const tquat<T>& b, T m )
+		{
+			tquat<T> result( a * ( T( 1 ) - m ) + ( b * m ) );
+			return normalize( result );
 		}
 
@@ -224,4 +230,93 @@
 				return ( nv::sin( ( T( 1 ) - m ) * angle ) * x + nv::sin( m * angle ) * z ) / nv::sin( angle );
 			}
+		}
+
+		template < typename T >
+		inline tquat<T> weighted_slerp( const tquat<T>& x, const tquat<T>& y, T m, T m1 )
+		{
+			tquat<T> z = y;
+			T cos_theta = dot( x, y );
+			if ( cos_theta < T( 0 ) )
+			{
+				z = -y;
+				cos_theta = -cos_theta;
+			}
+			if ( cos_theta > T( 1 ) - epsilon<T>() )
+			{
+				return tquat<T>(
+					x.w * m + z.w * m1,
+					x.x * m + z.x * m1,
+					x.y * m + z.y * m1,
+					x.z * m + z.z * m1 );
+			}
+			else
+			{
+				T angle = nv::acos( cos_theta );
+				return ( nv::sin( m1 * angle ) * x + nv::sin( m * angle ) * z ) / nv::sin( angle );
+			}
+		}
+
+
+		template < typename T >
+		inline tquat<T> exp( const tquat<T>& q )
+		{
+			tvec3<T> u( q.x, q.y, q.z );
+			T angle = math::length( u );
+			if ( angle < epsilon<T>() )
+				return tquat<T>();
+
+			tvec3<T> v( u / angle );
+			return tquat<T>( nv::cos( angle ), nv::sin( angle ) * v );
+		}
+
+		template < typename T >
+		inline tquat<T> pow( const tquat<T>& x, const T& y )
+		{
+			if ( abs( x.w ) > ( static_cast<T>( 1 ) - epsilon<T>() ) )
+				return x;
+			T angle = acos( y );
+			T nangle = angle * y;
+			T div = sin( nangle ) / sin( angle );
+			return normalize( tquat<T>(
+				cos( nangle ),
+				x.x * div,
+				x.y * div,
+				x.z * div ) );
+		}
+		template < typename T >
+		inline tquat<T> log( const tquat<T>& q )
+		{
+			tvec3<T> u( q.x, q.y, q.z );
+			T veclen = length( u );
+
+			if ( veclen < epsilon<T>() )
+			{
+				if ( q.w > T( 0 ) )
+					return tquat<T>( nv::log( q.w ), T( 0 ), T( 0 ), T( 0 ) );
+				else if ( q.w < T( 0 ) )
+					return tquat<T>( nv::log( -q.w ), pi<T>(), T( 0 ), T( 0 ) );
+				else
+					return tquat<T>( INFINITY, INFINITY, INFINITY, INFINITY );
+			}
+			else
+			{
+				T quatlen = nv::sqrt( veclen * veclen + q.w * q.w );
+				T t = nv::atan2( veclen, T( q.w ) ) / veclen;
+				return tquat<T>( nv::log( quatlen ), t * q.x, t * q.y, t * q.z );
+			}
+		}
+
+		template < typename T >
+		tquat<T> intermediate( const tquat<T>& prev, const tquat<T>& curr, const tquat<T>& next )
+		{
+			tquat<T> inv = inverse( curr );
+			return curr * math::exp( ( math::log( inv * next ) + math::log( inv * prev ) ) / T( -4 ) );
+		}
+
+
+		template <typename T>
+		inline tquat<T> squad( const tquat<T>& q1, const tquat<T>& q2, const tquat<T>& s1, const tquat<T>& s2, const T& h )
+		{
+			return slerp( slerp( q1, q2, h ), slerp( s1, s2, h ), T( 2 ) * ( T( 1 ) - h ) * h );
 		}
 
Index: trunk/nv/stl/stream.hh
===================================================================
--- trunk/nv/stl/stream.hh	(revision 483)
+++ trunk/nv/stl/stream.hh	(revision 484)
@@ -36,4 +36,5 @@
 		virtual bool gets( char* buffer, size_t max_count ) = 0;
 		virtual bool seek( long offset, origin orig ) = 0;
+		virtual bool eof() = 0;
 		virtual size_t tell() = 0;
 		virtual size_t size() = 0;
Index: trunk/src/engine/image_manager.cc
===================================================================
--- trunk/src/engine/image_manager.cc	(revision 484)
+++ trunk/src/engine/image_manager.cc	(revision 484)
@@ -0,0 +1,24 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+#include "nv/engine/image_manager.hh"
+
+#include "nv/image/png_loader.hh"
+#include "nv/io/c_file_system.hh"
+
+using namespace nv;
+
+bool image_manager::load_resource( const string_view& filename )
+{
+	png_loader loader;
+	c_file_system fs;
+	stream* file = fs.open( filename );
+	image_data* result = loader.load( *file );
+	delete file;
+	add( filename, result );
+	return result != nullptr;
+}
+
Index: trunk/src/engine/material_manager.cc
===================================================================
--- trunk/src/engine/material_manager.cc	(revision 484)
+++ trunk/src/engine/material_manager.cc	(revision 484)
@@ -0,0 +1,47 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+#include "nv/engine/material_manager.hh"
+
+#include "nv/io/c_file_system.hh"
+#include "nv/image/png_loader.hh"
+
+using namespace nv;
+
+bool gpu_material_manager::load_resource( const string_view& id )
+{
+	if ( auto mat = m_material_manager->get( id ).lock() )
+	{
+		gpu_material* result = new gpu_material;
+		sampler smp( sampler::LINEAR, sampler::REPEAT );
+		for ( uint32 i = 0; i < size( mat->paths ); ++i )
+			if ( !mat->paths[i].empty() )
+				if ( auto data = m_image_manager->get( mat->paths[i] ).lock() )
+					result->textures[i] = m_context->get_device()->create_texture( &*data, smp );
+		add( id, result );
+		return true;
+	}
+	return false;
+}
+
+void gpu_material_manager::release( gpu_material* m )
+{
+	for ( const texture& t : m->textures )
+	{
+		m_context->get_device()->release( t );
+	}
+}
+
+bool material_manager::load_resource( nv::lua::table_guard& table, nv::shash64 id )
+{
+	material* m = new material;
+	m->paths[ TEX_DIFFUSE ]  = table.get_string128( "diffuse" );
+	m->paths[ TEX_SPECULAR ] = table.get_string128( "specular" );
+	m->paths[ TEX_NORMAL ]   = table.get_string128( "normal" );
+	m->paths[ TEX_GLOSS ]    = table.get_string128( "gloss" );
+	add( id, m );
+	return true;
+}
Index: trunk/src/engine/mesh_manager.cc
===================================================================
--- trunk/src/engine/mesh_manager.cc	(revision 484)
+++ trunk/src/engine/mesh_manager.cc	(revision 484)
@@ -0,0 +1,43 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+#include "nv/engine/mesh_manager.hh"
+
+using namespace nv;
+
+resource< gpu_mesh > gpu_mesh_manager::load_resource( resource< data_channel_set > mesh )
+{
+	resource< gpu_mesh > result = get( mesh.id().value() );
+	if ( result ) return result;
+	if ( auto lmesh = mesh.lock() )
+	{
+		gpu_mesh* gm = new gpu_mesh;
+		gm->va = m_context->create_vertex_array( &*lmesh, STATIC_DRAW );
+		gm->count = lmesh->get_channel_size( slot::INDEX );
+		gm->shader = lmesh->get_channel( slot::BONEINDEX ) != nullptr ? BONE : NORMAL;
+		return add( mesh.id(), gm );
+	}
+	return resource< gpu_mesh >();
+}
+
+bool nv::gpu_mesh_manager::load_resource( const string_view& id )
+{
+	if ( auto lmesh = m_mesh_manager->get( id ).lock() )
+	{
+		gpu_mesh* gm = new gpu_mesh;
+		gm->va = m_context->create_vertex_array( &*lmesh, STATIC_DRAW );
+		gm->count = lmesh->get_channel_size( slot::INDEX );
+		gm->shader = lmesh->get_channel( slot::BONEINDEX ) != nullptr ? BONE : NORMAL;
+		add( id, gm );
+		return true;
+	}
+	return false;
+}
+
+void gpu_mesh_manager::release( gpu_mesh* m )
+{
+	m_context->release( m->va );
+}
Index: trunk/src/gfx/skeleton_instance.cc
===================================================================
--- trunk/src/gfx/skeleton_instance.cc	(revision 483)
+++ trunk/src/gfx/skeleton_instance.cc	(revision 484)
@@ -52,5 +52,8 @@
 	for ( uint32 n = 0; n < skeleton.size(); ++n )
 	{
-		m_matrix[n] = transforms[n].extract() * bones.m_offsets[n];
+ 		transform tr( bones.m_offsets[n] );
+ 		tr.set_orientation( normalize( tr.get_orientation() ) );
+ 		m_matrix[n] = ( transforms[n] * tr ).extract();
+	//	m_matrix[n] = transforms[n].extract() * bones.m_offsets[n];
 	}
 }
@@ -74,5 +77,5 @@
 }
 
-void nv::skeleton_transforms::interpolate( const skeleton_transforms& a, const skeleton_transforms& b, float t )
+void nv::skeleton_transforms::interpolate_linear( const skeleton_transforms& a, const skeleton_transforms& b, float t )
 {
 	NV_ASSERT( a.size() == b.size(), "!!!" );
@@ -81,6 +84,117 @@
 	for ( uint32 n = 0; n < a.size(); ++n )
 	{
+		m_transforms[n] = transform(
+			math::mix( a.m_transforms[n].get_position(), b.m_transforms[n].get_position(), t ),
+			math::lerp( a.m_transforms[n].get_orientation(), b.m_transforms[n].get_orientation(), t )
+			);
+	}
+
+	if ( m_transforms.size() > 0 )
+		m_transforms[0] = nv::interpolate( a.m_transforms[0], b.m_transforms[0], t );
+}
+
+void nv::skeleton_transforms::interpolate_nlerp( const skeleton_transforms& a, const skeleton_transforms& b, float t )
+{
+	NV_ASSERT( a.size() == b.size(), "!!!" );
+	if ( m_transforms.size() != a.size() )
+		m_transforms.resize( a.size() );
+
+	for ( uint32 n = 0; n < a.size(); ++n )
+	{
+		m_transforms[n] = transform(
+			math::mix( a.m_transforms[n].get_position(), b.m_transforms[n].get_position(), t ),
+			math::nlerp( a.m_transforms[n].get_orientation(), b.m_transforms[n].get_orientation(), t )
+			);
+	}
+
+	if ( m_transforms.size() > 0 )
+		m_transforms[0] = nv::interpolate( a.m_transforms[0], b.m_transforms[0], t );
+}
+
+
+void nv::skeleton_transforms::interpolate_slerp( const skeleton_transforms& a, const skeleton_transforms& b, float t )
+{
+	NV_ASSERT( a.size() == b.size(), "!!!" );
+	if ( m_transforms.size() != a.size() )
+		m_transforms.resize( a.size() );
+	for ( uint32 n = 0; n < a.size(); ++n )
+	{
 		m_transforms[n] = nv::interpolate( a.m_transforms[n], b.m_transforms[n], t );
 	}
+}
+
+void nv::skeleton_transforms::interpolate4( const skeleton_transforms& s1, const skeleton_transforms& v1, const skeleton_transforms& v2, const skeleton_transforms& s2, float t )
+{
+	NV_ASSERT( s1.size() == s2.size(), "!!!" );
+	NV_ASSERT( v1.size() == v2.size(), "!!!" );
+	NV_ASSERT( s1.size() == v1.size(), "!!!" );
+	if ( m_transforms.size() != s1.size() )
+		m_transforms.resize( s1.size() );
+	float interp_squared = t*t;
+	float interp_cubed = interp_squared*t;
+	float weights[4];
+	weights[0] = 0.5f * ( -interp_cubed + 2.0f * interp_squared - t );
+	weights[1] = 0.5f * ( 3.0f * interp_cubed - 5.0f * interp_squared + 2.0f );
+	weights[2] = 0.5f * ( -3.0f * interp_cubed + 4.0f * interp_squared + t );
+	weights[3] = 0.5f * ( interp_cubed - interp_squared );
+
+	for ( uint32 n = 0; n < s1.size(); ++n )
+	{
+		quat qs1 = s1.m_transforms[n].get_orientation();
+		quat qs2 = s2.m_transforms[n].get_orientation();
+		quat qv1 = v1.m_transforms[n].get_orientation();
+		quat qv2 = v2.m_transforms[n].get_orientation();
+
+		float a = dot( qv1, qv2 ) > 0.0f ? 1.0f : -1.0f;
+
+		quat qr = weights[0] * qs1 
+				+ weights[1] * (a * qv1 )
+				+ weights[2] * qv2 
+				+ weights[3] * qs2;
+
+		qr = normalize( qr );
+
+ 		if ( n == 0 ) 
+			qr = nv::math::slerp( v1.m_transforms[n].get_orientation(), v2.m_transforms[n].get_orientation(), t );
+
+		m_transforms[n] = transform(
+			weights[0] * s1.m_transforms[n].get_position() +
+			weights[1] * v1.m_transforms[n].get_position() +
+			weights[2] * v2.m_transforms[n].get_position() +
+			weights[3] * s2.m_transforms[n].get_position(),
+			qr
+		);
+	}
+}
+
+
+void nv::skeleton_transforms::interpolate_squad( const skeleton_transforms& s1, const skeleton_transforms& v1, const skeleton_transforms& v2, const skeleton_transforms& s2, float t )
+{
+	NV_ASSERT( s1.size() == s2.size(), "!!!" );
+	NV_ASSERT( v1.size() == v2.size(), "!!!" );
+	NV_ASSERT( s1.size() == v1.size(), "!!!" );
+	if ( m_transforms.size() != s1.size() )
+		m_transforms.resize( s1.size() );
+
+	for ( uint32 n = 0; n < s1.size(); ++n )
+	{
+		nv::quat ss1 = s1.m_transforms[n].get_orientation();
+		nv::quat ss2 = s2.m_transforms[n].get_orientation();
+		nv::quat q = normalize( nv::math::squad(
+			v1.m_transforms[n].get_orientation(),
+			v2.m_transforms[n].get_orientation(),
+			nv::math::intermediate( ss1, v1.m_transforms[n].get_orientation(), v2.m_transforms[n].get_orientation() ),
+			nv::math::intermediate( v1.m_transforms[n].get_orientation(), v2.m_transforms[n].get_orientation(), ss2 ),
+			t ) );
+		if ( n == 0 ) q = nv::math::slerp(
+			v1.m_transforms[n].get_orientation(),
+			v2.m_transforms[n].get_orientation(), t );
+
+		m_transforms[n] = transform(
+			mix( v1.m_transforms[n].get_position(), v2.m_transforms[n].get_position(), t ),
+			q
+			);
+	}
+
 }
 
@@ -101,5 +215,7 @@
 		{
 			if ( node->size() > 0 )
+			{
 				m_transforms[bone_id] = raw_channel_interpolator( node, binding.m_key ).get< transform >( frame );
+			}
 			int confirm_that_not_needed;
 // 			else
@@ -189,10 +305,7 @@
 void nv::bone_transforms::prepare( const data_node_list& bone_data )
 {
-	if ( m_offsets.empty() )
-	{
-		m_offsets.resize( bone_data.size() );
-
-		for ( nv::uint16 bi = 0; bi < bone_data.size(); ++bi )
-			m_offsets[bi] = bone_data[bi].transform;
-	}
-}
+	m_offsets.resize( bone_data.size() );
+
+	for ( nv::uint16 bi = 0; bi < bone_data.size(); ++bi )
+		m_offsets[bi] = bone_data[bi].transform;
+}
Index: trunk/src/image/miniz.cc
===================================================================
--- trunk/src/image/miniz.cc	(revision 484)
+++ trunk/src/image/miniz.cc	(revision 484)
@@ -0,0 +1,2657 @@
+#include "nv/image/miniz.hh"
+#include "nv/core/profiler.hh"
+
+using namespace nv;
+
+#if defined( _M_IX86 ) || defined( _M_X64 ) || defined( __i386__ ) || defined( __i386 ) || defined( __i486__ ) || defined( __i486 ) || defined( i386 ) || defined( __ia64__ ) || defined( __x86_64__ )
+// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
+#define MINIZ_X86_OR_X64_CPU 1
+#endif
+
+#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
+// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
+#define MINIZ_LITTLE_ENDIAN 1
+#endif
+
+#if MINIZ_X86_OR_X64_CPU
+// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
+#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+#endif
+
+#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
+// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
+#define MINIZ_HAS_64BIT_REGISTERS 1
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	// ------------------- zlib-style API Definitions.
+
+	// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits!
+	typedef unsigned long mz_ulong;
+
+	// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
+	void mz_free( void *p );
+
+#define MZ_ADLER32_INIT (1)
+	// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
+	mz_ulong mz_adler32( mz_ulong adler, const unsigned char *ptr, size_t buf_len );
+
+#define MZ_CRC32_INIT (0)
+	// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
+	mz_ulong mz_crc32( mz_ulong crc, const unsigned char *ptr, size_t buf_len );
+
+	// Compression strategies.
+	enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
+
+	// Method
+#define MZ_DEFLATED 8
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+	// Heap allocation callbacks.
+	// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
+	typedef void *( *mz_alloc_func )( void *opaque, size_t items, size_t size );
+	typedef void( *mz_free_func )( void *opaque, void *address );
+	typedef void *( *mz_realloc_func )( void *opaque, void *address, size_t items, size_t size );
+
+#define MZ_VERSION          "9.1.15"
+#define MZ_VERNUM           0x91F0
+#define MZ_VER_MAJOR        9
+#define MZ_VER_MINOR        1
+#define MZ_VER_REVISION     15
+#define MZ_VER_SUBREVISION  0
+
+	// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
+	enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
+
+	// Return status codes. MZ_PARAM_ERROR is non-standard.
+	enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 };
+
+	// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
+	enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
+
+	// Window bits
+#define MZ_DEFAULT_WINDOW_BITS 15
+
+	struct mz_internal_state;
+
+	// Compression/decompression stream struct.
+	typedef struct mz_stream_s
+	{
+		const unsigned char *next_in;     // pointer to next byte to read
+		unsigned int avail_in;            // number of bytes available at next_in
+		mz_ulong total_in;                // total number of bytes consumed so far
+
+		unsigned char *next_out;          // pointer to next byte to write
+		unsigned int avail_out;           // number of bytes that can be written to next_out
+		mz_ulong total_out;               // total number of bytes produced so far
+
+		char *msg;                        // error msg (unused)
+		struct mz_internal_state *state;  // internal state, allocated by zalloc/zfree
+
+		mz_alloc_func zalloc;             // optional heap allocation function (defaults to malloc)
+		mz_free_func zfree;               // optional heap free function (defaults to free)
+		void *opaque;                     // heap alloc function user pointer
+
+		int data_type;                    // data_type (unused)
+		mz_ulong adler;                   // adler32 of the source or uncompressed data
+		mz_ulong reserved;                // not used
+	} mz_stream;
+
+	typedef mz_stream *mz_streamp;
+
+	// Returns the version string of miniz.c.
+	const char *mz_version( void );
+
+	// mz_deflateInit() initializes a compressor with default options:
+	// Parameters:
+	//  pStream must point to an initialized mz_stream struct.
+	//  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
+	//  level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
+	//  (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
+	// Return values:
+	//  MZ_OK on success.
+	//  MZ_STREAM_ERROR if the stream is bogus.
+	//  MZ_PARAM_ERROR if the input parameters are bogus.
+	//  MZ_MEM_ERROR on out of memory.
+	int mz_deflateInit( mz_streamp pStream, int level );
+
+	// mz_deflateInit2() is like mz_deflate(), except with more control:
+	// Additional parameters:
+	//   method must be MZ_DEFLATED
+	//   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
+	//   mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
+	int mz_deflateInit2( mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy );
+
+	// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
+	int mz_deflateReset( mz_streamp pStream );
+
+	// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
+	// Parameters:
+	//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+	//   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
+	// Return values:
+	//   MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
+	//   MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
+	//   MZ_STREAM_ERROR if the stream is bogus.
+	//   MZ_PARAM_ERROR if one of the parameters is invalid.
+	//   MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
+	int mz_deflate( mz_streamp pStream, int flush );
+
+	// mz_deflateEnd() deinitializes a compressor:
+	// Return values:
+	//  MZ_OK on success.
+	//  MZ_STREAM_ERROR if the stream is bogus.
+	int mz_deflateEnd( mz_streamp pStream );
+
+	// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
+	mz_ulong mz_deflateBound( mz_streamp pStream, mz_ulong source_len );
+
+	// Single-call compression functions mz_compress() and mz_compress2():
+	// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
+	int mz_compress( unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len );
+	int mz_compress2( unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level );
+
+	// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
+	mz_ulong mz_compressBound( mz_ulong source_len );
+
+	// Initializes a decompressor.
+	int mz_inflateInit( mz_streamp pStream );
+
+	// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
+	// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
+	int mz_inflateInit2( mz_streamp pStream, int window_bits );
+
+	// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
+	// Parameters:
+	//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+	//   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
+	//   On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
+	//   MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
+	// Return values:
+	//   MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
+	//   MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
+	//   MZ_STREAM_ERROR if the stream is bogus.
+	//   MZ_DATA_ERROR if the deflate stream is invalid.
+	//   MZ_PARAM_ERROR if one of the parameters is invalid.
+	//   MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
+	//   with more input data, or with more room in the output buffer (except when using single call decompression, described above).
+	int mz_inflate( mz_streamp pStream, int flush );
+
+	// Deinitializes a decompressor.
+	int mz_inflateEnd( mz_streamp pStream );
+
+	// Single-call decompression.
+	// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
+	int mz_uncompress( unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len );
+
+	// Returns a string description of the specified error code, or NULL if the error code is invalid.
+	const char *mz_error( int err );
+
+	// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
+	// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
+#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+	typedef unsigned char Byte;
+	typedef unsigned int uInt;
+	typedef mz_ulong uLong;
+	typedef Byte Bytef;
+	typedef uInt uIntf;
+	typedef char charf;
+	typedef int intf;
+	typedef void *voidpf;
+	typedef uLong uLongf;
+	typedef void *voidp;
+	typedef void *const voidpc;
+#define Z_NULL                0
+#define Z_NO_FLUSH            MZ_NO_FLUSH
+#define Z_PARTIAL_FLUSH       MZ_PARTIAL_FLUSH
+#define Z_SYNC_FLUSH          MZ_SYNC_FLUSH
+#define Z_FULL_FLUSH          MZ_FULL_FLUSH
+#define Z_FINISH              MZ_FINISH
+#define Z_BLOCK               MZ_BLOCK
+#define Z_OK                  MZ_OK
+#define Z_STREAM_END          MZ_STREAM_END
+#define Z_NEED_DICT           MZ_NEED_DICT
+#define Z_ERRNO               MZ_ERRNO
+#define Z_STREAM_ERROR        MZ_STREAM_ERROR
+#define Z_DATA_ERROR          MZ_DATA_ERROR
+#define Z_MEM_ERROR           MZ_MEM_ERROR
+#define Z_BUF_ERROR           MZ_BUF_ERROR
+#define Z_VERSION_ERROR       MZ_VERSION_ERROR
+#define Z_PARAM_ERROR         MZ_PARAM_ERROR
+#define Z_NO_COMPRESSION      MZ_NO_COMPRESSION
+#define Z_BEST_SPEED          MZ_BEST_SPEED
+#define Z_BEST_COMPRESSION    MZ_BEST_COMPRESSION
+#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
+#define Z_DEFAULT_STRATEGY    MZ_DEFAULT_STRATEGY
+#define Z_FILTERED            MZ_FILTERED
+#define Z_HUFFMAN_ONLY        MZ_HUFFMAN_ONLY
+#define Z_RLE                 MZ_RLE
+#define Z_FIXED               MZ_FIXED
+#define Z_DEFLATED            MZ_DEFLATED
+#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
+#define alloc_func            mz_alloc_func
+#define free_func             mz_free_func
+#define internal_state        mz_internal_state
+#define z_stream              mz_stream
+#define deflateInit           mz_deflateInit
+#define deflateInit2          mz_deflateInit2
+#define deflateReset          mz_deflateReset
+#define deflate               mz_deflate
+#define deflateEnd            mz_deflateEnd
+#define deflateBound          mz_deflateBound
+#define compress              mz_compress
+#define compress2             mz_compress2
+#define compressBound         mz_compressBound
+#define inflateInit           mz_inflateInit
+#define inflateInit2          mz_inflateInit2
+#define inflate               mz_inflate
+#define inflateEnd            mz_inflateEnd
+#define uncompress            mz_uncompress
+#define crc32                 mz_crc32
+#define adler32               mz_adler32
+#define MAX_WBITS             15
+#define MAX_MEM_LEVEL         9
+#define zError                mz_error
+#define ZLIB_VERSION          MZ_VERSION
+#define ZLIB_VERNUM           MZ_VERNUM
+#define ZLIB_VER_MAJOR        MZ_VER_MAJOR
+#define ZLIB_VER_MINOR        MZ_VER_MINOR
+#define ZLIB_VER_REVISION     MZ_VER_REVISION
+#define ZLIB_VER_SUBREVISION  MZ_VER_SUBREVISION
+#define zlibVersion           mz_version
+#define zlib_version          mz_version()
+#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
+#endif // MINIZ_NO_ZLIB_APIS
+
+	// ------------------- Types and macros
+
+	typedef unsigned char mz_uint8;
+	typedef signed short mz_int16;
+	typedef unsigned short mz_uint16;
+	typedef unsigned int mz_uint32;
+	typedef unsigned int mz_uint;
+	typedef long long mz_int64;
+	typedef unsigned long long mz_uint64;
+	typedef int mz_bool;
+
+#define MZ_FALSE (0)
+#define MZ_TRUE (1)
+
+	// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
+#ifdef _MSC_VER
+#define MZ_MACRO_END while (0, 0)
+#else
+#define MZ_MACRO_END while (0)
+#endif
+
+	// ------------------- ZIP archive reading/writing
+
+#ifndef MINIZ_NO_ARCHIVE_APIS
+
+	enum
+	{
+		MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024,
+		MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
+		MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
+	};
+
+	typedef struct
+	{
+		mz_uint32 m_file_index;
+		mz_uint32 m_central_dir_ofs;
+		mz_uint16 m_version_made_by;
+		mz_uint16 m_version_needed;
+		mz_uint16 m_bit_flag;
+		mz_uint16 m_method;
+#ifndef MINIZ_NO_TIME
+		time_t m_time;
+#endif
+		mz_uint32 m_crc32;
+		mz_uint64 m_comp_size;
+		mz_uint64 m_uncomp_size;
+		mz_uint16 m_internal_attr;
+		mz_uint32 m_external_attr;
+		mz_uint64 m_local_header_ofs;
+		mz_uint32 m_comment_size;
+		char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
+		char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
+	} mz_zip_archive_file_stat;
+
+	typedef size_t( *mz_file_read_func )( void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n );
+	typedef size_t( *mz_file_write_func )( void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n );
+
+	struct mz_zip_internal_state_tag;
+	typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
+
+	typedef enum
+	{
+		MZ_ZIP_MODE_INVALID = 0,
+		MZ_ZIP_MODE_READING = 1,
+		MZ_ZIP_MODE_WRITING = 2,
+		MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
+	} mz_zip_mode;
+
+	typedef struct mz_zip_archive_tag
+	{
+		mz_uint64 m_archive_size;
+		mz_uint64 m_central_directory_file_ofs;
+		mz_uint m_total_files;
+		mz_zip_mode m_zip_mode;
+
+		mz_uint m_file_offset_alignment;
+
+		mz_alloc_func m_pAlloc;
+		mz_free_func m_pFree;
+		mz_realloc_func m_pRealloc;
+		void *m_pAlloc_opaque;
+
+		mz_file_read_func m_pRead;
+		mz_file_write_func m_pWrite;
+		void *m_pIO_opaque;
+
+		mz_zip_internal_state *m_pState;
+
+	} mz_zip_archive;
+
+	typedef enum
+	{
+		MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
+		MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
+		MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
+		MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
+	} mz_zip_flags;
+
+	// ZIP archive reading
+
+	// Inits a ZIP archive reader.
+	// These functions read and validate the archive's central directory.
+	mz_bool mz_zip_reader_init( mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags );
+	mz_bool mz_zip_reader_init_mem( mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags );
+
+#ifndef MINIZ_NO_STDIO
+	mz_bool mz_zip_reader_init_file( mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags );
+#endif
+
+	// Returns the total number of files in the archive.
+	mz_uint mz_zip_reader_get_num_files( mz_zip_archive *pZip );
+
+	// Returns detailed information about an archive file entry.
+	mz_bool mz_zip_reader_file_stat( mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat );
+
+	// Determines if an archive file entry is a directory entry.
+	mz_bool mz_zip_reader_is_file_a_directory( mz_zip_archive *pZip, mz_uint file_index );
+	mz_bool mz_zip_reader_is_file_encrypted( mz_zip_archive *pZip, mz_uint file_index );
+
+	// Retrieves the filename of an archive file entry.
+	// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename.
+	mz_uint mz_zip_reader_get_filename( mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size );
+
+	// Attempts to locates a file in the archive's central directory.
+	// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
+	// Returns -1 if the file cannot be found.
+	int mz_zip_reader_locate_file( mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags );
+
+	// Extracts a archive file to a memory buffer using no memory allocation.
+	mz_bool mz_zip_reader_extract_to_mem_no_alloc( mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size );
+	mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size );
+
+	// Extracts a archive file to a memory buffer.
+	mz_bool mz_zip_reader_extract_to_mem( mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags );
+	mz_bool mz_zip_reader_extract_file_to_mem( mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags );
+
+	// Extracts a archive file to a dynamically allocated heap buffer.
+	void *mz_zip_reader_extract_to_heap( mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags );
+	void *mz_zip_reader_extract_file_to_heap( mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags );
+
+	// Extracts a archive file using a callback function to output the file's data.
+	mz_bool mz_zip_reader_extract_to_callback( mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags );
+	mz_bool mz_zip_reader_extract_file_to_callback( mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags );
+
+#ifndef MINIZ_NO_STDIO
+	// Extracts a archive file to a disk file and sets its last accessed and modified times.
+	// This function only extracts files, not archive directory records.
+	mz_bool mz_zip_reader_extract_to_file( mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags );
+	mz_bool mz_zip_reader_extract_file_to_file( mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags );
+#endif
+
+	// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used.
+	mz_bool mz_zip_reader_end( mz_zip_archive *pZip );
+
+	// ZIP archive writing
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+	// Inits a ZIP archive writer.
+	mz_bool mz_zip_writer_init( mz_zip_archive *pZip, mz_uint64 existing_size );
+	mz_bool mz_zip_writer_init_heap( mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size );
+
+#ifndef MINIZ_NO_STDIO
+	mz_bool mz_zip_writer_init_file( mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning );
+#endif
+
+	// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive.
+	// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called.
+	// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it).
+	// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL.
+	// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before
+	// the archive is finalized the file's central directory will be hosed.
+	mz_bool mz_zip_writer_init_from_reader( mz_zip_archive *pZip, const char *pFilename );
+
+	// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive.
+	// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer.
+	// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+	mz_bool mz_zip_writer_add_mem( mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags );
+	mz_bool mz_zip_writer_add_mem_ex( mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32 );
+
+#ifndef MINIZ_NO_STDIO
+	// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive.
+	// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+	mz_bool mz_zip_writer_add_file( mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags );
+#endif
+
+	// Adds a file to an archive by fully cloning the data from another archive.
+	// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields.
+	mz_bool mz_zip_writer_add_from_zip_reader( mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index );
+
+	// Finalizes the archive by writing the central directory records followed by the end of central directory record.
+	// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end().
+	// An archive must be manually finalized by calling this function for it to be valid.
+	mz_bool mz_zip_writer_finalize_archive( mz_zip_archive *pZip );
+	mz_bool mz_zip_writer_finalize_heap_archive( mz_zip_archive *pZip, void **pBuf, size_t *pSize );
+
+	// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used.
+	// Note for the archive to be valid, it must have been finalized before ending.
+	mz_bool mz_zip_writer_end( mz_zip_archive *pZip );
+
+	// Misc. high-level helper functions:
+
+	// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive.
+	// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+	mz_bool mz_zip_add_mem_to_archive_file_in_place( const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags );
+
+	// Reads a single file from an archive into a heap block.
+	// Returns NULL on failure.
+	void *mz_zip_extract_archive_file_to_heap( const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags );
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
+
+	// ------------------- Low-level Decompression API Definitions
+
+	// Decompression flags used by tinfl_decompress().
+	// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
+	// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
+	// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
+	// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
+	enum
+	{
+		TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
+		TINFL_FLAG_HAS_MORE_INPUT = 2,
+		TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
+		TINFL_FLAG_COMPUTE_ADLER32 = 8
+	};
+
+	// High level decompression functions:
+	// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
+	// On entry:
+	//  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
+	// On return:
+	//  Function returns a pointer to the decompressed data, or NULL on failure.
+	//  *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
+	//  The caller must call mz_free() on the returned block when it's no longer needed.
+	void *tinfl_decompress_mem_to_heap( const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags );
+
+	// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
+	// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
+#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
+	size_t tinfl_decompress_mem_to_mem( void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags );
+
+	// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
+	// Returns 1 on success or 0 on failure.
+	typedef int( *tinfl_put_buf_func_ptr )( const void* pBuf, int len, void *pUser );
+	int tinfl_decompress_mem_to_callback( const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags );
+
+	struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
+
+	// Max size of LZ dictionary.
+#define TINFL_LZ_DICT_SIZE 32768
+
+	// Return status.
+	typedef enum
+	{
+		TINFL_STATUS_BAD_PARAM = -3,
+		TINFL_STATUS_ADLER32_MISMATCH = -2,
+		TINFL_STATUS_FAILED = -1,
+		TINFL_STATUS_DONE = 0,
+		TINFL_STATUS_NEEDS_MORE_INPUT = 1,
+		TINFL_STATUS_HAS_MORE_OUTPUT = 2
+	} tinfl_status;
+
+	// Initializes the decompressor to its initial state.
+#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
+#define tinfl_get_adler32(r) (r)->m_check_adler32
+
+	// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
+	// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
+	tinfl_status tinfl_decompress( tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags );
+
+	// Internal/private bits follow.
+	enum
+	{
+		TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
+		TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
+	};
+
+	typedef struct
+	{
+		mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
+		mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
+	} tinfl_huff_table;
+
+#if MINIZ_HAS_64BIT_REGISTERS
+#define TINFL_USE_64BIT_BITBUF 1
+#endif
+
+#if TINFL_USE_64BIT_BITBUF
+	typedef mz_uint64 tinfl_bit_buf_t;
+#define TINFL_BITBUF_SIZE (64)
+#else
+	typedef mz_uint32 tinfl_bit_buf_t;
+#define TINFL_BITBUF_SIZE (32)
+#endif
+
+	struct tinfl_decompressor_tag
+	{
+		mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
+		tinfl_bit_buf_t m_bit_buf;
+		size_t m_dist_from_out_buf_start;
+		tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
+		mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
+	};
+
+	// ------------------- Low-level Compression API Definitions
+
+	// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
+#define TDEFL_LESS_MEMORY 0
+
+	// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
+	// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
+	enum
+	{
+		TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
+	};
+
+	// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
+	// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
+	// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
+	// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
+	// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
+	// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
+	// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
+	// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
+	// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
+	enum
+	{
+		TDEFL_WRITE_ZLIB_HEADER = 0x01000,
+		TDEFL_COMPUTE_ADLER32 = 0x02000,
+		TDEFL_GREEDY_PARSING_FLAG = 0x04000,
+		TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
+		TDEFL_RLE_MATCHES = 0x10000,
+		TDEFL_FILTER_MATCHES = 0x20000,
+		TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
+		TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
+	};
+
+	// High level compression functions:
+	// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
+	// On entry:
+	//  pSrc_buf, src_buf_len: Pointer and size of source block to compress.
+	//  flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
+	// On return:
+	//  Function returns a pointer to the compressed data, or NULL on failure.
+	//  *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
+	//  The caller must free() the returned block when it's no longer needed.
+	void *tdefl_compress_mem_to_heap( const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags );
+
+	// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
+	// Returns 0 on failure.
+	size_t tdefl_compress_mem_to_mem( void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags );
+
+	// Compresses an image to a compressed PNG file in memory.
+	// On entry:
+	//  pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. 
+	//  The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
+	//  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
+	//  If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
+	// On return:
+	//  Function returns a pointer to the compressed data, or NULL on failure.
+	//  *pLen_out will be set to the size of the PNG image file.
+	//  The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
+	void *tdefl_write_image_to_png_file_in_memory_ex( const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip );
+	void *tdefl_write_image_to_png_file_in_memory( const void *pImage, int w, int h, int num_chans, size_t *pLen_out );
+
+	// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
+	typedef mz_bool( *tdefl_put_buf_func_ptr )( const void* pBuf, int len, void *pUser );
+
+	// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
+	mz_bool tdefl_compress_mem_to_output( const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags );
+
+	enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
+
+	// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
+#if TDEFL_LESS_MEMORY
+	enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = ( TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = ( TDEFL_LZ_HASH_BITS + 2 ) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#else
+	enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = ( TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = ( TDEFL_LZ_HASH_BITS + 2 ) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#endif
+
+	// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
+	typedef enum
+	{
+		TDEFL_STATUS_BAD_PARAM = -2,
+		TDEFL_STATUS_PUT_BUF_FAILED = -1,
+		TDEFL_STATUS_OKAY = 0,
+		TDEFL_STATUS_DONE = 1,
+	} tdefl_status;
+
+	// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
+	typedef enum
+	{
+		TDEFL_NO_FLUSH = 0,
+		TDEFL_SYNC_FLUSH = 2,
+		TDEFL_FULL_FLUSH = 3,
+		TDEFL_FINISH = 4
+	} tdefl_flush;
+
+	// tdefl's compression state structure.
+	typedef struct
+	{
+		tdefl_put_buf_func_ptr m_pPut_buf_func;
+		void *m_pPut_buf_user;
+		mz_uint m_flags, m_max_probes[2];
+		int m_greedy_parsing;
+		mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
+		mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
+		mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
+		mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
+		tdefl_status m_prev_return_status;
+		const void *m_pIn_buf;
+		void *m_pOut_buf;
+		size_t *m_pIn_buf_size, *m_pOut_buf_size;
+		tdefl_flush m_flush;
+		const mz_uint8 *m_pSrc;
+		size_t m_src_buf_left, m_out_buf_ofs;
+		mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
+		mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+		mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+		mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+		mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
+		mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
+		mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
+		mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
+	} tdefl_compressor;
+
+	// Initializes the compressor.
+	// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
+	// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
+	// If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
+	// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
+	tdefl_status tdefl_init( tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags );
+
+	// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
+	tdefl_status tdefl_compress( tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush );
+
+	// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
+	// tdefl_compress_buffer() always consumes the entire input buffer.
+	tdefl_status tdefl_compress_buffer( tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush );
+
+	tdefl_status tdefl_get_prev_return_status( tdefl_compressor *d );
+	mz_uint32 tdefl_get_adler32( tdefl_compressor *d );
+
+	// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
+#ifndef MINIZ_NO_ZLIB_APIS
+	// Create tdefl_compress() flags given zlib-style compression parameters.
+	// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
+	// window_bits may be -15 (raw deflate) or 15 (zlib)
+	// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
+	mz_uint tdefl_create_comp_flags_from_zip_params( int level, int window_bits, int strategy );
+#endif // #ifndef MINIZ_NO_ZLIB_APIS
+
+#ifdef __cplusplus
+}
+#endif
+
+
+// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.)
+
+typedef unsigned char mz_validate_uint16[sizeof( mz_uint16 ) == 2 ? 1 : -1];
+typedef unsigned char mz_validate_uint32[sizeof( mz_uint32 ) == 4 ? 1 : -1];
+typedef unsigned char mz_validate_uint64[sizeof( mz_uint64 ) == 8 ? 1 : -1];
+
+#include <string.h>
+#include <assert.h>
+
+#define MZ_ASSERT(x) assert(x)
+
+#ifdef MINIZ_NO_MALLOC
+#define MZ_MALLOC(x) NULL
+#define MZ_FREE(x) (void)x, ((void)0)
+#define MZ_REALLOC(p, x) NULL
+#else
+#define MZ_MALLOC(x) nvmalloc(x)
+#define MZ_FREE(x) nvfree(x)
+#define MZ_REALLOC(p, x) nvrealloc(p, x)
+#endif
+
+#define MZ_MAX(a,b) (((a)>(b))?(a):(b))
+#define MZ_MIN(a,b) (((a)<(b))?(a):(b))
+#define MZ_CLEAR_OBJ(obj) nvmemset(&(obj), 0, sizeof(obj))
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+#define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
+#define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
+#else
+#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
+#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
+#endif
+
+#ifdef _MSC_VER
+#define MZ_FORCEINLINE __forceinline
+#elif defined(__GNUC__)
+#define MZ_FORCEINLINE inline __attribute__((__always_inline__))
+#else
+#define MZ_FORCEINLINE inline
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	// ------------------- zlib-style API's
+
+	mz_ulong mz_adler32( mz_ulong adler, const unsigned char *ptr, size_t buf_len )
+	{
+		mz_uint32 i, s1 = (mz_uint32)( adler & 0xffff ), s2 = (mz_uint32)( adler >> 16 ); size_t block_len = buf_len % 5552;
+		if ( !ptr ) return MZ_ADLER32_INIT;
+		while ( buf_len )
+		{
+			for ( i = 0; i + 7 < block_len; i += 8, ptr += 8 )
+			{
+				s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+				s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+			}
+			for ( ; i < block_len; ++i ) s1 += *ptr++, s2 += s1;
+			s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+		}
+		return ( s2 << 16 ) + s1;
+	}
+
+	// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/
+	mz_ulong mz_crc32( mz_ulong crc, const mz_uint8 *ptr, size_t buf_len )
+	{
+		static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
+			0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
+		mz_uint32 crcu32 = (mz_uint32)crc;
+		if ( !ptr ) return MZ_CRC32_INIT;
+		crcu32 = ~crcu32; while ( buf_len-- ) { mz_uint8 b = *ptr++; crcu32 = ( crcu32 >> 4 ) ^ s_crc32[( crcu32 & 0xF ) ^ ( b & 0xF )]; crcu32 = ( crcu32 >> 4 ) ^ s_crc32[( crcu32 & 0xF ) ^ ( b >> 4 )]; }
+		return ~crcu32;
+	}
+
+	void mz_free( void *p )
+	{
+		MZ_FREE( p );
+	}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+	static void *def_alloc_func( void *opaque, size_t items, size_t size ) { (void)opaque, (void)items, (void)size; return MZ_MALLOC( items * size ); }
+	static void def_free_func( void *opaque, void *address ) { (void)opaque, (void)address; MZ_FREE( address ); }
+	static void *def_realloc_func( void *opaque, void *address, size_t items, size_t size ) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC( address, items * size ); }
+
+	const char *mz_version( void )
+	{
+		return MZ_VERSION;
+	}
+
+	int mz_deflateInit( mz_streamp pStream, int level )
+	{
+		return mz_deflateInit2( pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY );
+	}
+
+	int mz_deflateInit2( mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy )
+	{
+		tdefl_compressor *pComp;
+		mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params( level, window_bits, strategy );
+
+		if ( !pStream ) return MZ_STREAM_ERROR;
+		if ( ( method != MZ_DEFLATED ) || ( ( mem_level < 1 ) || ( mem_level > 9 ) ) || ( ( window_bits != MZ_DEFAULT_WINDOW_BITS ) && ( -window_bits != MZ_DEFAULT_WINDOW_BITS ) ) ) return MZ_PARAM_ERROR;
+
+		pStream->data_type = 0;
+		pStream->adler = MZ_ADLER32_INIT;
+		pStream->msg = NULL;
+		pStream->reserved = 0;
+		pStream->total_in = 0;
+		pStream->total_out = 0;
+		if ( !pStream->zalloc ) pStream->zalloc = def_alloc_func;
+		if ( !pStream->zfree ) pStream->zfree = def_free_func;
+
+		pComp = (tdefl_compressor *)pStream->zalloc( pStream->opaque, 1, sizeof( tdefl_compressor ) );
+		if ( !pComp )
+			return MZ_MEM_ERROR;
+
+		pStream->state = ( struct mz_internal_state * )pComp;
+
+		if ( tdefl_init( pComp, NULL, NULL, comp_flags ) != TDEFL_STATUS_OKAY )
+		{
+			mz_deflateEnd( pStream );
+			return MZ_PARAM_ERROR;
+		}
+
+		return MZ_OK;
+	}
+
+	int mz_deflateReset( mz_streamp pStream )
+	{
+		if ( ( !pStream ) || ( !pStream->state ) || ( !pStream->zalloc ) || ( !pStream->zfree ) ) return MZ_STREAM_ERROR;
+		pStream->total_in = pStream->total_out = 0;
+		tdefl_init( (tdefl_compressor*)pStream->state, NULL, NULL, ( (tdefl_compressor*)pStream->state )->m_flags );
+		return MZ_OK;
+	}
+
+	int mz_deflate( mz_streamp pStream, int flush )
+	{
+		size_t in_bytes, out_bytes;
+		mz_ulong orig_total_in, orig_total_out;
+		int mz_status = MZ_OK;
+
+		if ( ( !pStream ) || ( !pStream->state ) || ( flush < 0 ) || ( flush > MZ_FINISH ) || ( !pStream->next_out ) ) return MZ_STREAM_ERROR;
+		if ( !pStream->avail_out ) return MZ_BUF_ERROR;
+
+		if ( flush == MZ_PARTIAL_FLUSH ) flush = MZ_SYNC_FLUSH;
+
+		if ( ( (tdefl_compressor*)pStream->state )->m_prev_return_status == TDEFL_STATUS_DONE )
+			return ( flush == MZ_FINISH ) ? MZ_STREAM_END : MZ_BUF_ERROR;
+
+		orig_total_in = pStream->total_in; orig_total_out = pStream->total_out;
+		for ( ; ; )
+		{
+			tdefl_status defl_status;
+			in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+
+			defl_status = tdefl_compress( (tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush );
+			pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+			pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32( (tdefl_compressor*)pStream->state );
+
+			pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes;
+			pStream->total_out += (mz_uint)out_bytes;
+
+			if ( defl_status < 0 )
+			{
+				mz_status = MZ_STREAM_ERROR;
+				break;
+			}
+			else if ( defl_status == TDEFL_STATUS_DONE )
+			{
+				mz_status = MZ_STREAM_END;
+				break;
+			}
+			else if ( !pStream->avail_out )
+				break;
+			else if ( ( !pStream->avail_in ) && ( flush != MZ_FINISH ) )
+			{
+				if ( ( flush ) || ( pStream->total_in != orig_total_in ) || ( pStream->total_out != orig_total_out ) )
+					break;
+				return MZ_BUF_ERROR; // Can't make forward progress without some input.
+			}
+		}
+		return mz_status;
+	}
+
+	int mz_deflateEnd( mz_streamp pStream )
+	{
+		if ( !pStream ) return MZ_STREAM_ERROR;
+		if ( pStream->state )
+		{
+			pStream->zfree( pStream->opaque, pStream->state );
+			pStream->state = NULL;
+		}
+		return MZ_OK;
+	}
+
+	mz_ulong mz_deflateBound( mz_streamp pStream, mz_ulong source_len )
+	{
+		(void)pStream;
+		// This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.)
+		return MZ_MAX( 128 + ( source_len * 110 ) / 100, 128 + source_len + ( ( source_len / ( 31 * 1024 ) ) + 1 ) * 5 );
+	}
+
+	int mz_compress2( unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level )
+	{
+		int status;
+		mz_stream stream;
+		memset( &stream, 0, sizeof( stream ) );
+
+		// In case mz_ulong is 64-bits (argh I hate longs).
+		if ( ( source_len | *pDest_len ) > 0xFFFFFFFFU ) return MZ_PARAM_ERROR;
+
+		stream.next_in = pSource;
+		stream.avail_in = (mz_uint32)source_len;
+		stream.next_out = pDest;
+		stream.avail_out = (mz_uint32)*pDest_len;
+
+		status = mz_deflateInit( &stream, level );
+		if ( status != MZ_OK ) return status;
+
+		status = mz_deflate( &stream, MZ_FINISH );
+		if ( status != MZ_STREAM_END )
+		{
+			mz_deflateEnd( &stream );
+			return ( status == MZ_OK ) ? MZ_BUF_ERROR : status;
+		}
+
+		*pDest_len = stream.total_out;
+		return mz_deflateEnd( &stream );
+	}
+
+	int mz_compress( unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len )
+	{
+		return mz_compress2( pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION );
+	}
+
+	mz_ulong mz_compressBound( mz_ulong source_len )
+	{
+		return mz_deflateBound( NULL, source_len );
+	}
+
+	typedef struct
+	{
+		tinfl_decompressor m_decomp;
+		mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits;
+		mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
+		tinfl_status m_last_status;
+	} inflate_state;
+
+	int mz_inflateInit2( mz_streamp pStream, int window_bits )
+	{
+		inflate_state *pDecomp;
+		if ( !pStream ) return MZ_STREAM_ERROR;
+		if ( ( window_bits != MZ_DEFAULT_WINDOW_BITS ) && ( -window_bits != MZ_DEFAULT_WINDOW_BITS ) ) return MZ_PARAM_ERROR;
+
+		pStream->data_type = 0;
+		pStream->adler = 0;
+		pStream->msg = NULL;
+		pStream->total_in = 0;
+		pStream->total_out = 0;
+		pStream->reserved = 0;
+		if ( !pStream->zalloc ) pStream->zalloc = def_alloc_func;
+		if ( !pStream->zfree ) pStream->zfree = def_free_func;
+
+		pDecomp = (inflate_state*)pStream->zalloc( pStream->opaque, 1, sizeof( inflate_state ) );
+		if ( !pDecomp ) return MZ_MEM_ERROR;
+
+		pStream->state = ( struct mz_internal_state * )pDecomp;
+
+		tinfl_init( &pDecomp->m_decomp );
+		pDecomp->m_dict_ofs = 0;
+		pDecomp->m_dict_avail = 0;
+		pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
+		pDecomp->m_first_call = 1;
+		pDecomp->m_has_flushed = 0;
+		pDecomp->m_window_bits = window_bits;
+
+		return MZ_OK;
+	}
+
+	int mz_inflateInit( mz_streamp pStream )
+	{
+		return mz_inflateInit2( pStream, MZ_DEFAULT_WINDOW_BITS );
+	}
+
+	int mz_inflate( mz_streamp pStream, int flush )
+	{
+		inflate_state* pState;
+		mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
+		size_t in_bytes, out_bytes, orig_avail_in;
+		tinfl_status status;
+
+		if ( ( !pStream ) || ( !pStream->state ) ) return MZ_STREAM_ERROR;
+		if ( flush == MZ_PARTIAL_FLUSH ) flush = MZ_SYNC_FLUSH;
+		if ( ( flush ) && ( flush != MZ_SYNC_FLUSH ) && ( flush != MZ_FINISH ) ) return MZ_STREAM_ERROR;
+
+		pState = (inflate_state*)pStream->state;
+		if ( pState->m_window_bits > 0 ) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
+		orig_avail_in = pStream->avail_in;
+
+		first_call = pState->m_first_call; pState->m_first_call = 0;
+		if ( pState->m_last_status < 0 ) return MZ_DATA_ERROR;
+
+		if ( pState->m_has_flushed && ( flush != MZ_FINISH ) ) return MZ_STREAM_ERROR;
+		pState->m_has_flushed |= ( flush == MZ_FINISH );
+
+		if ( ( flush == MZ_FINISH ) && ( first_call ) )
+		{
+			// MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file.
+			decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
+			in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+			status = tinfl_decompress( &pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags );
+			pState->m_last_status = status;
+			pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes;
+			pStream->adler = tinfl_get_adler32( &pState->m_decomp );
+			pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes;
+
+			if ( status < 0 )
+				return MZ_DATA_ERROR;
+			else if ( status != TINFL_STATUS_DONE )
+			{
+				pState->m_last_status = TINFL_STATUS_FAILED;
+				return MZ_BUF_ERROR;
+			}
+			return MZ_STREAM_END;
+		}
+		// flush != MZ_FINISH then we must assume there's more input.
+		if ( flush != MZ_FINISH ) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
+
+		if ( pState->m_dict_avail )
+		{
+			n = MZ_MIN( pState->m_dict_avail, pStream->avail_out );
+			memcpy( pStream->next_out, pState->m_dict + pState->m_dict_ofs, n );
+			pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+			pState->m_dict_avail -= n; pState->m_dict_ofs = ( pState->m_dict_ofs + n ) & ( TINFL_LZ_DICT_SIZE - 1 );
+			return ( ( pState->m_last_status == TINFL_STATUS_DONE ) && ( !pState->m_dict_avail ) ) ? MZ_STREAM_END : MZ_OK;
+		}
+
+		for ( ; ; )
+		{
+			in_bytes = pStream->avail_in;
+			out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
+
+			status = tinfl_decompress( &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags );
+			pState->m_last_status = status;
+
+			pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+			pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32( &pState->m_decomp );
+
+			pState->m_dict_avail = (mz_uint)out_bytes;
+
+			n = MZ_MIN( pState->m_dict_avail, pStream->avail_out );
+			memcpy( pStream->next_out, pState->m_dict + pState->m_dict_ofs, n );
+			pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+			pState->m_dict_avail -= n; pState->m_dict_ofs = ( pState->m_dict_ofs + n ) & ( TINFL_LZ_DICT_SIZE - 1 );
+
+			if ( status < 0 )
+				return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well).
+			else if ( ( status == TINFL_STATUS_NEEDS_MORE_INPUT ) && ( !orig_avail_in ) )
+				return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH.
+			else if ( flush == MZ_FINISH )
+			{
+				// The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH.
+				if ( status == TINFL_STATUS_DONE )
+					return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
+				// status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong.
+				else if ( !pStream->avail_out )
+					return MZ_BUF_ERROR;
+			}
+			else if ( ( status == TINFL_STATUS_DONE ) || ( !pStream->avail_in ) || ( !pStream->avail_out ) || ( pState->m_dict_avail ) )
+				break;
+		}
+
+		return ( ( status == TINFL_STATUS_DONE ) && ( !pState->m_dict_avail ) ) ? MZ_STREAM_END : MZ_OK;
+	}
+
+	int mz_inflateEnd( mz_streamp pStream )
+	{
+		if ( !pStream )
+			return MZ_STREAM_ERROR;
+		if ( pStream->state )
+		{
+			pStream->zfree( pStream->opaque, pStream->state );
+			pStream->state = NULL;
+		}
+		return MZ_OK;
+	}
+
+	int mz_uncompress( unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len )
+	{
+		mz_stream stream;
+		int status;
+		memset( &stream, 0, sizeof( stream ) );
+
+		// In case mz_ulong is 64-bits (argh I hate longs).
+		if ( ( source_len | *pDest_len ) > 0xFFFFFFFFU ) return MZ_PARAM_ERROR;
+
+		stream.next_in = pSource;
+		stream.avail_in = (mz_uint32)source_len;
+		stream.next_out = pDest;
+		stream.avail_out = (mz_uint32)*pDest_len;
+
+		status = mz_inflateInit( &stream );
+		if ( status != MZ_OK )
+			return status;
+
+		status = mz_inflate( &stream, MZ_FINISH );
+		if ( status != MZ_STREAM_END )
+		{
+			mz_inflateEnd( &stream );
+			return ( ( status == MZ_BUF_ERROR ) && ( !stream.avail_in ) ) ? MZ_DATA_ERROR : status;
+		}
+		*pDest_len = stream.total_out;
+
+		return mz_inflateEnd( &stream );
+	}
+
+	const char *mz_error( int err )
+	{
+		static struct { int m_err; const char *m_pDesc; } s_error_descs[] =
+		{
+			{ MZ_OK, "" },{ MZ_STREAM_END, "stream end" },{ MZ_NEED_DICT, "need dictionary" },{ MZ_ERRNO, "file error" },{ MZ_STREAM_ERROR, "stream error" },
+			{ MZ_DATA_ERROR, "data error" },{ MZ_MEM_ERROR, "out of memory" },{ MZ_BUF_ERROR, "buf error" },{ MZ_VERSION_ERROR, "version error" },{ MZ_PARAM_ERROR, "parameter error" }
+		};
+		mz_uint i; for ( i = 0; i < sizeof( s_error_descs ) / sizeof( s_error_descs[0] ); ++i ) if ( s_error_descs[i].m_err == err ) return s_error_descs[i].m_pDesc;
+		return NULL;
+	}
+
+#endif //MINIZ_NO_ZLIB_APIS
+
+	// ------------------- Low-level Decompression (completely independent from all compression API's)
+
+#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
+#define TINFL_MEMSET(p, c, l) memset(p, c, l)
+
+#define TINFL_CR_BEGIN switch(r->m_state) { case 0:
+#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END
+#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END
+#define TINFL_CR_FINISH }
+
+	// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never
+	// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario.
+#define TINFL_GET_BYTE(state_index, c) do { \
+  if (pIn_buf_cur >= pIn_buf_end) { \
+    for ( ; ; ) { \
+      if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \
+        TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \
+        if (pIn_buf_cur < pIn_buf_end) { \
+          c = *pIn_buf_cur++; \
+          break; \
+        } \
+      } else { \
+        c = 0; \
+        break; \
+      } \
+    } \
+  } else c = *pIn_buf_cur++; } MZ_MACRO_END
+
+#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n))
+#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+
+	// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2.
+	// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a
+	// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the
+	// bit buffer contains >=15 bits (deflate's max. Huffman code size).
+#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \
+  do { \
+    temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \
+    if (temp >= 0) { \
+      code_len = temp >> 9; \
+      if ((code_len) && (num_bits >= code_len)) \
+      break; \
+    } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \
+       code_len = TINFL_FAST_LOOKUP_BITS; \
+       do { \
+          temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \
+       } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \
+    } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \
+  } while (num_bits < 15);
+
+	// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read
+	// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully
+	// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32.
+	// The slow path is only executed at the very end of the input buffer.
+#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \
+  int temp; mz_uint code_len, c; \
+  if (num_bits < 15) { \
+    if ((pIn_buf_end - pIn_buf_cur) < 2) { \
+       TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \
+    } else { \
+       bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \
+    } \
+  } \
+  if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \
+    code_len = temp >> 9, temp &= 511; \
+  else { \
+    code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \
+  } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END
+
+	tinfl_status tinfl_decompress( tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags )
+	{
+		static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 };
+		static const int s_length_extra[31] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+		static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0 };
+		static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
+		static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+		static const int s_min_table_sizes[3] = { 257, 1, 4 };
+
+		tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf;
+		const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
+		mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size;
+		size_t out_buf_size_mask = ( decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF ) ? (size_t)-1 : ( ( pOut_buf_next - pOut_buf_start ) + *pOut_buf_size ) - 1, dist_from_out_buf_start;
+
+		// Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter).
+		if ( ( ( out_buf_size_mask + 1 ) & out_buf_size_mask ) || ( pOut_buf_next < pOut_buf_start ) ) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; }
+
+		num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start;
+		TINFL_CR_BEGIN
+
+			bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1;
+		if ( decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER )
+		{
+			TINFL_GET_BYTE( 1, r->m_zhdr0 ); TINFL_GET_BYTE( 2, r->m_zhdr1 );
+			counter = ( ( ( r->m_zhdr0 * 256 + r->m_zhdr1 ) % 31 != 0 ) || ( r->m_zhdr1 & 32 ) || ( ( r->m_zhdr0 & 15 ) != 8 ) );
+			if ( !( decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF ) ) counter |= ( ( ( 1U << ( 8U + ( r->m_zhdr0 >> 4 ) ) ) > 32768U ) || ( ( out_buf_size_mask + 1 ) < (size_t)( 1U << ( 8U + ( r->m_zhdr0 >> 4 ) ) ) ) );
+			if ( counter ) { TINFL_CR_RETURN_FOREVER( 36, TINFL_STATUS_FAILED ); }
+		}
+
+		do
+		{
+			TINFL_GET_BITS( 3, r->m_final, 3 ); r->m_type = r->m_final >> 1;
+			if ( r->m_type == 0 )
+			{
+				TINFL_SKIP_BITS( 5, num_bits & 7 );
+				for ( counter = 0; counter < 4; ++counter ) { if ( num_bits ) TINFL_GET_BITS( 6, r->m_raw_header[counter], 8 ); else TINFL_GET_BYTE( 7, r->m_raw_header[counter] ); }
+				if ( ( counter = ( r->m_raw_header[0] | ( r->m_raw_header[1] << 8 ) ) ) != (mz_uint)( 0xFFFF ^ ( r->m_raw_header[2] | ( r->m_raw_header[3] << 8 ) ) ) ) { TINFL_CR_RETURN_FOREVER( 39, TINFL_STATUS_FAILED ); }
+				while ( ( counter ) && ( num_bits ) )
+				{
+					TINFL_GET_BITS( 51, dist, 8 );
+					while ( pOut_buf_cur >= pOut_buf_end ) { TINFL_CR_RETURN( 52, TINFL_STATUS_HAS_MORE_OUTPUT ); }
+					*pOut_buf_cur++ = (mz_uint8)dist;
+					counter--;
+				}
+				while ( counter )
+				{
+					size_t n; while ( pOut_buf_cur >= pOut_buf_end ) { TINFL_CR_RETURN( 9, TINFL_STATUS_HAS_MORE_OUTPUT ); }
+					while ( pIn_buf_cur >= pIn_buf_end )
+					{
+						if ( decomp_flags & TINFL_FLAG_HAS_MORE_INPUT )
+						{
+							TINFL_CR_RETURN( 38, TINFL_STATUS_NEEDS_MORE_INPUT );
+						}
+						else
+						{
+							TINFL_CR_RETURN_FOREVER( 40, TINFL_STATUS_FAILED );
+						}
+					}
+					n = MZ_MIN( MZ_MIN( (size_t)( pOut_buf_end - pOut_buf_cur ), (size_t)( pIn_buf_end - pIn_buf_cur ) ), counter );
+					TINFL_MEMCPY( pOut_buf_cur, pIn_buf_cur, n ); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n;
+				}
+			}
+			else if ( r->m_type == 3 )
+			{
+				TINFL_CR_RETURN_FOREVER( 10, TINFL_STATUS_FAILED );
+			}
+			else
+			{
+				if ( r->m_type == 1 )
+				{
+					mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i;
+					r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET( r->m_tables[1].m_code_size, 5, 32 );
+					for ( i = 0; i <= 143; ++i ) *p++ = 8; for ( ; i <= 255; ++i ) *p++ = 9; for ( ; i <= 279; ++i ) *p++ = 7; for ( ; i <= 287; ++i ) *p++ = 8;
+				}
+				else
+				{
+					for ( counter = 0; counter < 3; counter++ ) { TINFL_GET_BITS( 11, r->m_table_sizes[counter], "\05\05\04"[counter] ); r->m_table_sizes[counter] += s_min_table_sizes[counter]; }
+					MZ_CLEAR_OBJ( r->m_tables[2].m_code_size ); for ( counter = 0; counter < r->m_table_sizes[2]; counter++ ) { mz_uint s; TINFL_GET_BITS( 14, s, 3 ); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; }
+					r->m_table_sizes[2] = 19;
+				}
+				for ( ; (int)r->m_type >= 0; r->m_type-- )
+				{
+					int tree_next, tree_cur; tinfl_huff_table *pTable;
+					mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ( total_syms ); MZ_CLEAR_OBJ( pTable->m_look_up ); MZ_CLEAR_OBJ( pTable->m_tree );
+					for ( i = 0; i < r->m_table_sizes[r->m_type]; ++i ) total_syms[pTable->m_code_size[i]]++;
+					used_syms = 0, total = 0; next_code[0] = next_code[1] = 0;
+					for ( i = 1; i <= 15; ++i ) { used_syms += total_syms[i]; next_code[i + 1] = ( total = ( ( total + total_syms[i] ) << 1 ) ); }
+					if ( ( 65536 != total ) && ( used_syms > 1 ) )
+					{
+						TINFL_CR_RETURN_FOREVER( 35, TINFL_STATUS_FAILED );
+					}
+					for ( tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index )
+					{
+						mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if ( !code_size ) continue;
+						cur_code = next_code[code_size]++; for ( l = code_size; l > 0; l--, cur_code >>= 1 ) rev_code = ( rev_code << 1 ) | ( cur_code & 1 );
+						if ( code_size <= TINFL_FAST_LOOKUP_BITS ) { mz_int16 k = (mz_int16)( ( code_size << 9 ) | sym_index ); while ( rev_code < TINFL_FAST_LOOKUP_SIZE ) { pTable->m_look_up[rev_code] = k; rev_code += ( 1 << code_size ); } continue; }
+						if ( 0 == ( tree_cur = pTable->m_look_up[rev_code & ( TINFL_FAST_LOOKUP_SIZE - 1 )] ) ) { pTable->m_look_up[rev_code & ( TINFL_FAST_LOOKUP_SIZE - 1 )] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; }
+						rev_code >>= ( TINFL_FAST_LOOKUP_BITS - 1 );
+						for ( j = code_size; j > ( TINFL_FAST_LOOKUP_BITS + 1 ); j-- )
+						{
+							tree_cur -= ( ( rev_code >>= 1 ) & 1 );
+							if ( !pTable->m_tree[-tree_cur - 1] ) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; }
+							else tree_cur = pTable->m_tree[-tree_cur - 1];
+						}
+						tree_cur -= ( ( rev_code >>= 1 ) & 1 ); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index;
+					}
+					if ( r->m_type == 2 )
+					{
+						for ( counter = 0; counter < ( r->m_table_sizes[0] + r->m_table_sizes[1] ); )
+						{
+							mz_uint s; TINFL_HUFF_DECODE( 16, dist, &r->m_tables[2] ); if ( dist < 16 ) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; }
+							if ( ( dist == 16 ) && ( !counter ) )
+							{
+								TINFL_CR_RETURN_FOREVER( 17, TINFL_STATUS_FAILED );
+							}
+							num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS( 18, s, num_extra ); s += "\03\03\013"[dist - 16];
+							TINFL_MEMSET( r->m_len_codes + counter, ( dist == 16 ) ? r->m_len_codes[counter - 1] : 0, s ); counter += s;
+						}
+						if ( ( r->m_table_sizes[0] + r->m_table_sizes[1] ) != counter )
+						{
+							TINFL_CR_RETURN_FOREVER( 21, TINFL_STATUS_FAILED );
+						}
+						TINFL_MEMCPY( r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0] ); TINFL_MEMCPY( r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1] );
+					}
+				}
+				for ( ; ; )
+				{
+					mz_uint8 *pSrc;
+					for ( ; ; )
+					{
+						if ( ( ( pIn_buf_end - pIn_buf_cur ) < 4 ) || ( ( pOut_buf_end - pOut_buf_cur ) < 2 ) )
+						{
+							TINFL_HUFF_DECODE( 23, counter, &r->m_tables[0] );
+							if ( counter >= 256 )
+								break;
+							while ( pOut_buf_cur >= pOut_buf_end ) { TINFL_CR_RETURN( 24, TINFL_STATUS_HAS_MORE_OUTPUT ); }
+							*pOut_buf_cur++ = (mz_uint8)counter;
+						}
+						else
+						{
+							int sym2; mz_uint code_len;
+#if TINFL_USE_64BIT_BITBUF
+							if ( num_bits < 30 ) { bit_buf |= ( ( (tinfl_bit_buf_t)MZ_READ_LE32( pIn_buf_cur ) ) << num_bits ); pIn_buf_cur += 4; num_bits += 32; }
+#else
+							if ( num_bits < 15 ) { bit_buf |= ( ( (tinfl_bit_buf_t)MZ_READ_LE16( pIn_buf_cur ) ) << num_bits ); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+							if ( ( sym2 = r->m_tables[0].m_look_up[bit_buf & ( TINFL_FAST_LOOKUP_SIZE - 1 )] ) >= 0 )
+								code_len = sym2 >> 9;
+							else
+							{
+								code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ( ( bit_buf >> code_len++ ) & 1 )]; } while ( sym2 < 0 );
+							}
+							counter = sym2; bit_buf >>= code_len; num_bits -= code_len;
+							if ( counter & 256 )
+								break;
+
+#if !TINFL_USE_64BIT_BITBUF
+							if ( num_bits < 15 ) { bit_buf |= ( ( (tinfl_bit_buf_t)MZ_READ_LE16( pIn_buf_cur ) ) << num_bits ); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+							if ( ( sym2 = r->m_tables[0].m_look_up[bit_buf & ( TINFL_FAST_LOOKUP_SIZE - 1 )] ) >= 0 )
+								code_len = sym2 >> 9;
+							else
+							{
+								code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ( ( bit_buf >> code_len++ ) & 1 )]; } while ( sym2 < 0 );
+							}
+							bit_buf >>= code_len; num_bits -= code_len;
+
+							pOut_buf_cur[0] = (mz_uint8)counter;
+							if ( sym2 & 256 )
+							{
+								pOut_buf_cur++;
+								counter = sym2;
+								break;
+							}
+							pOut_buf_cur[1] = (mz_uint8)sym2;
+							pOut_buf_cur += 2;
+						}
+					}
+					if ( ( counter &= 511 ) == 256 ) break;
+
+					num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257];
+					if ( num_extra ) { mz_uint extra_bits; TINFL_GET_BITS( 25, extra_bits, num_extra ); counter += extra_bits; }
+
+					TINFL_HUFF_DECODE( 26, dist, &r->m_tables[1] );
+					num_extra = s_dist_extra[dist]; dist = s_dist_base[dist];
+					if ( num_extra ) { mz_uint extra_bits; TINFL_GET_BITS( 27, extra_bits, num_extra ); dist += extra_bits; }
+
+					dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
+					if ( ( dist > dist_from_out_buf_start ) && ( decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF ) )
+					{
+						TINFL_CR_RETURN_FOREVER( 37, TINFL_STATUS_FAILED );
+					}
+
+					pSrc = pOut_buf_start + ( ( dist_from_out_buf_start - dist ) & out_buf_size_mask );
+
+					if ( ( MZ_MAX( pOut_buf_cur, pSrc ) + counter ) > pOut_buf_end )
+					{
+						while ( counter-- )
+						{
+							while ( pOut_buf_cur >= pOut_buf_end ) { TINFL_CR_RETURN( 53, TINFL_STATUS_HAS_MORE_OUTPUT ); }
+							*pOut_buf_cur++ = pOut_buf_start[( dist_from_out_buf_start++ - dist ) & out_buf_size_mask];
+						}
+						continue;
+					}
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+					else if ( ( counter >= 9 ) && ( counter <= dist ) )
+					{
+						const mz_uint8 *pSrc_end = pSrc + ( counter & ~7 );
+						do
+						{
+							( (mz_uint32 *)pOut_buf_cur )[0] = ( (const mz_uint32 *)pSrc )[0];
+							( (mz_uint32 *)pOut_buf_cur )[1] = ( (const mz_uint32 *)pSrc )[1];
+							pOut_buf_cur += 8;
+						} while ( ( pSrc += 8 ) < pSrc_end );
+						if ( ( counter &= 7 ) < 3 )
+						{
+							if ( counter )
+							{
+								pOut_buf_cur[0] = pSrc[0];
+								if ( counter > 1 )
+									pOut_buf_cur[1] = pSrc[1];
+								pOut_buf_cur += counter;
+							}
+							continue;
+						}
+					}
+#endif
+					do
+					{
+						pOut_buf_cur[0] = pSrc[0];
+						pOut_buf_cur[1] = pSrc[1];
+						pOut_buf_cur[2] = pSrc[2];
+						pOut_buf_cur += 3; pSrc += 3;
+					} while ( (int)( counter -= 3 ) > 2 );
+					if ( (int)counter > 0 )
+					{
+						pOut_buf_cur[0] = pSrc[0];
+						if ( (int)counter > 1 )
+							pOut_buf_cur[1] = pSrc[1];
+						pOut_buf_cur += counter;
+					}
+				}
+			}
+		} while ( !( r->m_final & 1 ) );
+		if ( decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER )
+		{
+			TINFL_SKIP_BITS( 32, num_bits & 7 ); for ( counter = 0; counter < 4; ++counter ) { mz_uint s; if ( num_bits ) TINFL_GET_BITS( 41, s, 8 ); else TINFL_GET_BYTE( 42, s ); r->m_z_adler32 = ( r->m_z_adler32 << 8 ) | s; }
+		}
+		TINFL_CR_RETURN_FOREVER( 34, TINFL_STATUS_DONE );
+		TINFL_CR_FINISH
+
+			common_exit :
+		r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start;
+		*pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
+		if ( ( decomp_flags & ( TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32 ) ) && ( status >= 0 ) )
+		{
+			const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size;
+			mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552;
+			while ( buf_len )
+			{
+				for ( i = 0; i + 7 < block_len; i += 8, ptr += 8 )
+				{
+					s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+					s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+				}
+				for ( ; i < block_len; ++i ) s1 += *ptr++, s2 += s1;
+				s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+			}
+			r->m_check_adler32 = ( s2 << 16 ) + s1; if ( ( status == TINFL_STATUS_DONE ) && ( decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER ) && ( r->m_check_adler32 != r->m_z_adler32 ) ) status = TINFL_STATUS_ADLER32_MISMATCH;
+		}
+		return status;
+	}
+
+	// Higher level helper functions.
+	void *tinfl_decompress_mem_to_heap( const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags )
+	{
+		tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0;
+		*pOut_len = 0;
+		tinfl_init( &decomp );
+		for ( ; ; )
+		{
+			size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
+			tinfl_status status = tinfl_decompress( &decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size,
+				( flags & ~TINFL_FLAG_HAS_MORE_INPUT ) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF );
+			if ( ( status < 0 ) || ( status == TINFL_STATUS_NEEDS_MORE_INPUT ) )
+			{
+				MZ_FREE( pBuf ); *pOut_len = 0; return NULL;
+			}
+			src_buf_ofs += src_buf_size;
+			*pOut_len += dst_buf_size;
+			if ( status == TINFL_STATUS_DONE ) break;
+			new_out_buf_capacity = out_buf_capacity * 2; if ( new_out_buf_capacity < 128 ) new_out_buf_capacity = 128;
+			pNew_buf = MZ_REALLOC( pBuf, new_out_buf_capacity );
+			if ( !pNew_buf )
+			{
+				MZ_FREE( pBuf ); *pOut_len = 0; return NULL;
+			}
+			pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity;
+		}
+		return pBuf;
+	}
+
+	size_t tinfl_decompress_mem_to_mem( void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags )
+	{
+		tinfl_decompressor decomp; tinfl_status status; tinfl_init( &decomp );
+		status = tinfl_decompress( &decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, ( flags & ~TINFL_FLAG_HAS_MORE_INPUT ) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF );
+		return ( status != TINFL_STATUS_DONE ) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len;
+	}
+
+	int tinfl_decompress_mem_to_callback( const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags )
+	{
+		int result = 0;
+		tinfl_decompressor decomp;
+		mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC( TINFL_LZ_DICT_SIZE ); size_t in_buf_ofs = 0, dict_ofs = 0;
+		if ( !pDict )
+			return TINFL_STATUS_FAILED;
+		tinfl_init( &decomp );
+		for ( ; ; )
+		{
+			size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
+			tinfl_status status = tinfl_decompress( &decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
+				( flags & ~( TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF ) ) );
+			in_buf_ofs += in_buf_size;
+			if ( ( dst_buf_size ) && ( !( *pPut_buf_func )( pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user ) ) )
+				break;
+			if ( status != TINFL_STATUS_HAS_MORE_OUTPUT )
+			{
+				result = ( status == TINFL_STATUS_DONE );
+				break;
+			}
+			dict_ofs = ( dict_ofs + dst_buf_size ) & ( TINFL_LZ_DICT_SIZE - 1 );
+		}
+		MZ_FREE( pDict );
+		*pIn_buf_size = in_buf_ofs;
+		return result;
+	}
+
+	// ------------------- Low-level Compression (independent from all decompression API's)
+
+	// Purposely making these tables static for faster init and thread safety.
+	static const mz_uint16 s_tdefl_len_sym[256] = {
+		257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272,
+		273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276,
+		277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,
+		279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,
+		281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,
+		282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,
+		283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,
+		284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 };
+
+	static const mz_uint8 s_tdefl_len_extra[256] = {
+		0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+		4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+		5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+		5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 };
+
+	static const mz_uint8 s_tdefl_small_dist_sym[512] = {
+		0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
+		11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,
+		13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,
+		14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,
+		14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+		15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16,
+		16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+		16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+		16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+		17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+		17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+		17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 };
+
+	static const mz_uint8 s_tdefl_small_dist_extra[512] = {
+		0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,
+		5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+		6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+		6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+		7,7,7,7,7,7,7,7 };
+
+	static const mz_uint8 s_tdefl_large_dist_sym[128] = {
+		0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26,
+		26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,
+		28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 };
+
+	static const mz_uint8 s_tdefl_large_dist_extra[128] = {
+		0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,
+		12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+		13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 };
+
+	// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values.
+	typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq;
+	static tdefl_sym_freq* tdefl_radix_sort_syms( mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1 )
+	{
+		mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ( hist );
+		for ( i = 0; i < num_syms; i++ ) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ( ( freq >> 8 ) & 0xFF )]++; }
+		while ( ( total_passes > 1 ) && ( num_syms == hist[( total_passes - 1 ) * 256] ) ) total_passes--;
+		for ( pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8 )
+		{
+			const mz_uint32* pHist = &hist[pass << 8];
+			mz_uint offsets[256], cur_ofs = 0;
+			for ( i = 0; i < 256; i++ ) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
+			for ( i = 0; i < num_syms; i++ ) pNew_syms[offsets[( pCur_syms[i].m_key >> pass_shift ) & 0xFF]++] = pCur_syms[i];
+			{ tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; }
+		}
+		return pCur_syms;
+	}
+
+	// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
+	static void tdefl_calculate_minimum_redundancy( tdefl_sym_freq *A, int n )
+	{
+		int root, leaf, next, avbl, used, dpth;
+		if ( n == 0 ) return; else if ( n == 1 ) { A[0].m_key = 1; return; }
+		A[0].m_key += A[1].m_key; root = 0; leaf = 2;
+		for ( next = 1; next < n - 1; next++ )
+		{
+			if ( leaf >= n || A[root].m_key < A[leaf].m_key ) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; }
+			else A[next].m_key = A[leaf++].m_key;
+			if ( leaf >= n || ( root < next && A[root].m_key < A[leaf].m_key ) ) { A[next].m_key = (mz_uint16)( A[next].m_key + A[root].m_key ); A[root++].m_key = (mz_uint16)next; }
+			else A[next].m_key = (mz_uint16)( A[next].m_key + A[leaf++].m_key );
+		}
+		A[n - 2].m_key = 0; for ( next = n - 3; next >= 0; next-- ) A[next].m_key = A[A[next].m_key].m_key + 1;
+		avbl = 1; used = dpth = 0; root = n - 2; next = n - 1;
+		while ( avbl > 0 )
+		{
+			while ( root >= 0 && (int)A[root].m_key == dpth ) { used++; root--; }
+			while ( avbl > used ) { A[next--].m_key = (mz_uint16)( dpth ); avbl--; }
+			avbl = 2 * used; dpth++; used = 0;
+		}
+	}
+
+	// Limits canonical Huffman code table's max code size.
+	enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 };
+	static void tdefl_huffman_enforce_max_code_size( int *pNum_codes, int code_list_len, int max_code_size )
+	{
+		int i; mz_uint32 total = 0; if ( code_list_len <= 1 ) return;
+		for ( i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++ ) pNum_codes[max_code_size] += pNum_codes[i];
+		for ( i = max_code_size; i > 0; i-- ) total += ( ( (mz_uint32)pNum_codes[i] ) << ( max_code_size - i ) );
+		while ( total != ( 1UL << max_code_size ) )
+		{
+			pNum_codes[max_code_size]--;
+			for ( i = max_code_size - 1; i > 0; i-- ) if ( pNum_codes[i] ) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
+			total--;
+		}
+	}
+
+	static void tdefl_optimize_huffman_table( tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table )
+	{
+		int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ( num_codes );
+		if ( static_table )
+		{
+			for ( i = 0; i < table_len; i++ ) num_codes[d->m_huff_code_sizes[table_num][i]]++;
+		}
+		else
+		{
+			tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms;
+			int num_used_syms = 0;
+			const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
+			for ( i = 0; i < table_len; i++ ) if ( pSym_count[i] ) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; }
+
+			pSyms = tdefl_radix_sort_syms( num_used_syms, syms0, syms1 ); tdefl_calculate_minimum_redundancy( pSyms, num_used_syms );
+
+			for ( i = 0; i < num_used_syms; i++ ) num_codes[pSyms[i].m_key]++;
+
+			tdefl_huffman_enforce_max_code_size( num_codes, num_used_syms, code_size_limit );
+
+			MZ_CLEAR_OBJ( d->m_huff_code_sizes[table_num] ); MZ_CLEAR_OBJ( d->m_huff_codes[table_num] );
+			for ( i = 1, j = num_used_syms; i <= code_size_limit; i++ )
+				for ( l = num_codes[i]; l > 0; l-- ) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)( i );
+		}
+
+		next_code[1] = 0; for ( j = 0, i = 2; i <= code_size_limit; i++ ) next_code[i] = j = ( ( j + num_codes[i - 1] ) << 1 );
+
+		for ( i = 0; i < table_len; i++ )
+		{
+			mz_uint rev_code = 0, code, code_size; if ( ( code_size = d->m_huff_code_sizes[table_num][i] ) == 0 ) continue;
+			code = next_code[code_size]++; for ( l = code_size; l > 0; l--, code >>= 1 ) rev_code = ( rev_code << 1 ) | ( code & 1 );
+			d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
+		}
+	}
+
+#define TDEFL_PUT_BITS(b, l) do { \
+  mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \
+  d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \
+  while (d->m_bits_in >= 8) { \
+    if (d->m_pOutput_buf < d->m_pOutput_buf_end) \
+      *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
+      d->m_bit_buffer >>= 8; \
+      d->m_bits_in -= 8; \
+  } \
+} MZ_MACRO_END
+
+#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \
+  if (rle_repeat_count < 3) { \
+    d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
+    while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \
+  } else { \
+    d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \
+} rle_repeat_count = 0; } }
+
+#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \
+  if (rle_z_count < 3) { \
+    d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \
+  } else if (rle_z_count <= 10) { \
+    d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \
+  } else { \
+    d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \
+} rle_z_count = 0; } }
+
+	static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+
+	static void tdefl_start_dynamic_block( tdefl_compressor *d )
+	{
+		int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index;
+		mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF;
+
+		d->m_huff_count[0][256] = 1;
+
+		tdefl_optimize_huffman_table( d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE );
+		tdefl_optimize_huffman_table( d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE );
+
+		for ( num_lit_codes = 286; num_lit_codes > 257; num_lit_codes-- ) if ( d->m_huff_code_sizes[0][num_lit_codes - 1] ) break;
+		for ( num_dist_codes = 30; num_dist_codes > 1; num_dist_codes-- ) if ( d->m_huff_code_sizes[1][num_dist_codes - 1] ) break;
+
+		memcpy( code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes );
+		memcpy( code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes );
+		total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0;
+
+		memset( &d->m_huff_count[2][0], 0, sizeof( d->m_huff_count[2][0] ) * TDEFL_MAX_HUFF_SYMBOLS_2 );
+		for ( i = 0; i < total_code_sizes_to_pack; i++ )
+		{
+			mz_uint8 code_size = code_sizes_to_pack[i];
+			if ( !code_size )
+			{
+				TDEFL_RLE_PREV_CODE_SIZE();
+				if ( ++rle_z_count == 138 ) { TDEFL_RLE_ZERO_CODE_SIZE(); }
+			}
+			else
+			{
+				TDEFL_RLE_ZERO_CODE_SIZE();
+				if ( code_size != prev_code_size )
+				{
+					TDEFL_RLE_PREV_CODE_SIZE();
+					d->m_huff_count[2][code_size] = (mz_uint16)( d->m_huff_count[2][code_size] + 1 ); packed_code_sizes[num_packed_code_sizes++] = code_size;
+				}
+				else if ( ++rle_repeat_count == 6 )
+				{
+					TDEFL_RLE_PREV_CODE_SIZE();
+				}
+			}
+			prev_code_size = code_size;
+		}
+		if ( rle_repeat_count ) { TDEFL_RLE_PREV_CODE_SIZE(); }
+		else { TDEFL_RLE_ZERO_CODE_SIZE(); }
+
+		tdefl_optimize_huffman_table( d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE );
+
+		TDEFL_PUT_BITS( 2, 2 );
+
+		TDEFL_PUT_BITS( num_lit_codes - 257, 5 );
+		TDEFL_PUT_BITS( num_dist_codes - 1, 5 );
+
+		for ( num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths-- ) if ( d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]] ) break;
+		num_bit_lengths = MZ_MAX( 4, ( num_bit_lengths + 1 ) ); TDEFL_PUT_BITS( num_bit_lengths - 4, 4 );
+		for ( i = 0; (int)i < num_bit_lengths; i++ ) TDEFL_PUT_BITS( d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3 );
+
+		for ( packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; )
+		{
+			mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT( code < TDEFL_MAX_HUFF_SYMBOLS_2 );
+			TDEFL_PUT_BITS( d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code] );
+			if ( code >= 16 ) TDEFL_PUT_BITS( packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16] );
+		}
+	}
+
+	static void tdefl_start_static_block( tdefl_compressor *d )
+	{
+		mz_uint i;
+		mz_uint8 *p = &d->m_huff_code_sizes[0][0];
+
+		for ( i = 0; i <= 143; ++i ) *p++ = 8;
+		for ( ; i <= 255; ++i ) *p++ = 9;
+		for ( ; i <= 279; ++i ) *p++ = 7;
+		for ( ; i <= 287; ++i ) *p++ = 8;
+
+		memset( d->m_huff_code_sizes[1], 5, 32 );
+
+		tdefl_optimize_huffman_table( d, 0, 288, 15, MZ_TRUE );
+		tdefl_optimize_huffman_table( d, 1, 32, 15, MZ_TRUE );
+
+		TDEFL_PUT_BITS( 1, 2 );
+	}
+
+	static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+	static mz_bool tdefl_compress_lz_codes( tdefl_compressor *d )
+	{
+		mz_uint flags;
+		mz_uint8 *pLZ_codes;
+		mz_uint8 *pOutput_buf = d->m_pOutput_buf;
+		mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
+		mz_uint64 bit_buffer = d->m_bit_buffer;
+		mz_uint bits_in = d->m_bits_in;
+
+#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); }
+
+		flags = 1;
+		for ( pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1 )
+		{
+			if ( flags == 1 )
+				flags = *pLZ_codes++ | 0x100;
+
+			if ( flags & 1 )
+			{
+				mz_uint s0, s1, n0, n1, sym, num_extra_bits;
+				mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)( pLZ_codes + 1 ); pLZ_codes += 3;
+
+				MZ_ASSERT( d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]] );
+				TDEFL_PUT_BITS_FAST( d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]] );
+				TDEFL_PUT_BITS_FAST( match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len] );
+
+				// This sequence coaxes MSVC into using cmov's vs. jmp's.
+				s0 = s_tdefl_small_dist_sym[match_dist & 511];
+				n0 = s_tdefl_small_dist_extra[match_dist & 511];
+				s1 = s_tdefl_large_dist_sym[match_dist >> 8];
+				n1 = s_tdefl_large_dist_extra[match_dist >> 8];
+				sym = ( match_dist < 512 ) ? s0 : s1;
+				num_extra_bits = ( match_dist < 512 ) ? n0 : n1;
+
+				MZ_ASSERT( d->m_huff_code_sizes[1][sym] );
+				TDEFL_PUT_BITS_FAST( d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym] );
+				TDEFL_PUT_BITS_FAST( match_dist & mz_bitmasks[num_extra_bits], num_extra_bits );
+			}
+			else
+			{
+				mz_uint lit = *pLZ_codes++;
+				MZ_ASSERT( d->m_huff_code_sizes[0][lit] );
+				TDEFL_PUT_BITS_FAST( d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit] );
+
+				if ( ( ( flags & 2 ) == 0 ) && ( pLZ_codes < pLZ_code_buf_end ) )
+				{
+					flags >>= 1;
+					lit = *pLZ_codes++;
+					MZ_ASSERT( d->m_huff_code_sizes[0][lit] );
+					TDEFL_PUT_BITS_FAST( d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit] );
+
+					if ( ( ( flags & 2 ) == 0 ) && ( pLZ_codes < pLZ_code_buf_end ) )
+					{
+						flags >>= 1;
+						lit = *pLZ_codes++;
+						MZ_ASSERT( d->m_huff_code_sizes[0][lit] );
+						TDEFL_PUT_BITS_FAST( d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit] );
+					}
+				}
+			}
+
+			if ( pOutput_buf >= d->m_pOutput_buf_end )
+				return MZ_FALSE;
+
+			*(mz_uint64*)pOutput_buf = bit_buffer;
+			pOutput_buf += ( bits_in >> 3 );
+			bit_buffer >>= ( bits_in & ~7 );
+			bits_in &= 7;
+		}
+
+#undef TDEFL_PUT_BITS_FAST
+
+		d->m_pOutput_buf = pOutput_buf;
+		d->m_bits_in = 0;
+		d->m_bit_buffer = 0;
+
+		while ( bits_in )
+		{
+			mz_uint32 n = MZ_MIN( bits_in, 16 );
+			TDEFL_PUT_BITS( (mz_uint)bit_buffer & mz_bitmasks[n], n );
+			bit_buffer >>= n;
+			bits_in -= n;
+		}
+
+		TDEFL_PUT_BITS( d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256] );
+
+		return ( d->m_pOutput_buf < d->m_pOutput_buf_end );
+	}
+#else
+	static mz_bool tdefl_compress_lz_codes( tdefl_compressor *d )
+	{
+		mz_uint flags;
+		mz_uint8 *pLZ_codes;
+
+		flags = 1;
+		for ( pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1 )
+		{
+			if ( flags == 1 )
+				flags = *pLZ_codes++ | 0x100;
+			if ( flags & 1 )
+			{
+				mz_uint sym, num_extra_bits;
+				mz_uint match_len = pLZ_codes[0], match_dist = ( pLZ_codes[1] | ( pLZ_codes[2] << 8 ) ); pLZ_codes += 3;
+
+				MZ_ASSERT( d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]] );
+				TDEFL_PUT_BITS( d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]] );
+				TDEFL_PUT_BITS( match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len] );
+
+				if ( match_dist < 512 )
+				{
+					sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist];
+				}
+				else
+				{
+					sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
+				}
+				MZ_ASSERT( d->m_huff_code_sizes[1][sym] );
+				TDEFL_PUT_BITS( d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym] );
+				TDEFL_PUT_BITS( match_dist & mz_bitmasks[num_extra_bits], num_extra_bits );
+			}
+			else
+			{
+				mz_uint lit = *pLZ_codes++;
+				MZ_ASSERT( d->m_huff_code_sizes[0][lit] );
+				TDEFL_PUT_BITS( d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit] );
+			}
+		}
+
+		TDEFL_PUT_BITS( d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256] );
+
+		return ( d->m_pOutput_buf < d->m_pOutput_buf_end );
+	}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+
+	static mz_bool tdefl_compress_block( tdefl_compressor *d, mz_bool static_block )
+	{
+		if ( static_block )
+			tdefl_start_static_block( d );
+		else
+			tdefl_start_dynamic_block( d );
+		return tdefl_compress_lz_codes( d );
+	}
+
+	static int tdefl_flush_block( tdefl_compressor *d, int flush )
+	{
+		mz_uint saved_bit_buf, saved_bits_in;
+		mz_uint8 *pSaved_output_buf;
+		mz_bool comp_block_succeeded = MZ_FALSE;
+		int n, use_raw_block = ( ( d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS ) != 0 ) && ( d->m_lookahead_pos - d->m_lz_code_buf_dict_pos ) <= d->m_dict_size;
+		mz_uint8 *pOutput_buf_start = ( ( d->m_pPut_buf_func == NULL ) && ( ( *d->m_pOut_buf_size - d->m_out_buf_ofs ) >= TDEFL_OUT_BUF_SIZE ) ) ? ( (mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs ) : d->m_output_buf;
+
+		d->m_pOutput_buf = pOutput_buf_start;
+		d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
+
+		MZ_ASSERT( !d->m_output_flush_remaining );
+		d->m_output_flush_ofs = 0;
+		d->m_output_flush_remaining = 0;
+
+		*d->m_pLZ_flags = (mz_uint8)( *d->m_pLZ_flags >> d->m_num_flags_left );
+		d->m_pLZ_code_buf -= ( d->m_num_flags_left == 8 );
+
+		if ( ( d->m_flags & TDEFL_WRITE_ZLIB_HEADER ) && ( !d->m_block_index ) )
+		{
+			TDEFL_PUT_BITS( 0x78, 8 ); TDEFL_PUT_BITS( 0x01, 8 );
+		}
+
+		TDEFL_PUT_BITS( flush == TDEFL_FINISH, 1 );
+
+		pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in;
+
+		if ( !use_raw_block )
+			comp_block_succeeded = tdefl_compress_block( d, ( d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS ) || ( d->m_total_lz_bytes < 48 ) );
+
+		// If the block gets expanded, forget the current contents of the output buffer and send a raw block instead.
+		if ( ( ( use_raw_block ) || ( ( d->m_total_lz_bytes ) && ( ( d->m_pOutput_buf - pSaved_output_buf + 1U ) >= d->m_total_lz_bytes ) ) ) &&
+			( ( d->m_lookahead_pos - d->m_lz_code_buf_dict_pos ) <= d->m_dict_size ) )
+		{
+			mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+			TDEFL_PUT_BITS( 0, 2 );
+			if ( d->m_bits_in ) { TDEFL_PUT_BITS( 0, 8 - d->m_bits_in ); }
+			for ( i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF )
+			{
+				TDEFL_PUT_BITS( d->m_total_lz_bytes & 0xFFFF, 16 );
+			}
+			for ( i = 0; i < d->m_total_lz_bytes; ++i )
+			{
+				TDEFL_PUT_BITS( d->m_dict[( d->m_lz_code_buf_dict_pos + i ) & TDEFL_LZ_DICT_SIZE_MASK], 8 );
+			}
+		}
+		// Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes.
+		else if ( !comp_block_succeeded )
+		{
+			d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+			tdefl_compress_block( d, MZ_TRUE );
+		}
+
+		if ( flush )
+		{
+			if ( flush == TDEFL_FINISH )
+			{
+				if ( d->m_bits_in ) { TDEFL_PUT_BITS( 0, 8 - d->m_bits_in ); }
+				if ( d->m_flags & TDEFL_WRITE_ZLIB_HEADER ) { mz_uint i, a = d->m_adler32; for ( i = 0; i < 4; i++ ) { TDEFL_PUT_BITS( ( a >> 24 ) & 0xFF, 8 ); a <<= 8; } }
+			}
+			else
+			{
+				mz_uint i, z = 0; TDEFL_PUT_BITS( 0, 3 ); if ( d->m_bits_in ) { TDEFL_PUT_BITS( 0, 8 - d->m_bits_in ); } for ( i = 2; i; --i, z ^= 0xFFFF ) { TDEFL_PUT_BITS( z & 0xFFFF, 16 ); }
+			}
+		}
+
+		MZ_ASSERT( d->m_pOutput_buf < d->m_pOutput_buf_end );
+
+		memset( &d->m_huff_count[0][0], 0, sizeof( d->m_huff_count[0][0] ) * TDEFL_MAX_HUFF_SYMBOLS_0 );
+		memset( &d->m_huff_count[1][0], 0, sizeof( d->m_huff_count[1][0] ) * TDEFL_MAX_HUFF_SYMBOLS_1 );
+
+		d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++;
+
+		if ( ( n = (int)( d->m_pOutput_buf - pOutput_buf_start ) ) != 0 )
+		{
+			if ( d->m_pPut_buf_func )
+			{
+				*d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+				if ( !( *d->m_pPut_buf_func )( d->m_output_buf, n, d->m_pPut_buf_user ) )
+					return ( d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED );
+			}
+			else if ( pOutput_buf_start == d->m_output_buf )
+			{
+				int bytes_to_copy = (int)MZ_MIN( (size_t)n, (size_t)( *d->m_pOut_buf_size - d->m_out_buf_ofs ) );
+				memcpy( (mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy );
+				d->m_out_buf_ofs += bytes_to_copy;
+				if ( ( n -= bytes_to_copy ) != 0 )
+				{
+					d->m_output_flush_ofs = bytes_to_copy;
+					d->m_output_flush_remaining = n;
+				}
+			}
+			else
+			{
+				d->m_out_buf_ofs += n;
+			}
+		}
+
+		return d->m_output_flush_remaining;
+	}
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p)
+	static MZ_FORCEINLINE void tdefl_find_match( tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len )
+	{
+		mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+		mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+		const mz_uint16 *s = (const mz_uint16*)( d->m_dict + pos ), *p, *q;
+		mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD( &d->m_dict[pos + match_len - 1] ), s01 = TDEFL_READ_UNALIGNED_WORD( s );
+		MZ_ASSERT( max_match_len <= TDEFL_MAX_MATCH_LEN ); if ( max_match_len <= match_len ) return;
+		for ( ; ; )
+		{
+			for ( ; ; )
+			{
+				if ( --num_probes_left == 0 ) return;
+#define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break;
+				TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+			}
+			if ( !dist ) break; q = (const mz_uint16*)( d->m_dict + probe_pos ); if ( TDEFL_READ_UNALIGNED_WORD( q ) != s01 ) continue; p = s; probe_len = 32;
+			do {} while ( ( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) && ( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) &&
+				( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) && ( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) && ( --probe_len > 0 ) );
+			if ( !probe_len )
+			{
+				*pMatch_dist = dist; *pMatch_len = MZ_MIN( max_match_len, TDEFL_MAX_MATCH_LEN ); break;
+			}
+			else if ( ( probe_len = ( (mz_uint)( p - s ) * 2 ) + (mz_uint)( *(const mz_uint8*)p == *(const mz_uint8*)q ) ) > match_len )
+			{
+				*pMatch_dist = dist; if ( ( *pMatch_len = match_len = MZ_MIN( max_match_len, probe_len ) ) == max_match_len ) break;
+				c01 = TDEFL_READ_UNALIGNED_WORD( &d->m_dict[pos + match_len - 1] );
+			}
+		}
+	}
+#else
+	static MZ_FORCEINLINE void tdefl_find_match( tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len )
+	{
+		mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+		mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+		const mz_uint8 *s = d->m_dict + pos, *p, *q;
+		mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
+		MZ_ASSERT( max_match_len <= TDEFL_MAX_MATCH_LEN ); if ( max_match_len <= match_len ) return;
+		for ( ; ; )
+		{
+			for ( ; ; )
+			{
+				if ( --num_probes_left == 0 ) return;
+#define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break;
+				TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+			}
+			if ( !dist ) break; p = s; q = d->m_dict + probe_pos; for ( probe_len = 0; probe_len < max_match_len; probe_len++ ) if ( *p++ != *q++ ) break;
+			if ( probe_len > match_len )
+			{
+				*pMatch_dist = dist; if ( ( *pMatch_len = match_len = probe_len ) == max_match_len ) return;
+				c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1];
+			}
+		}
+	}
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+	static mz_bool tdefl_compress_fast( tdefl_compressor *d )
+	{
+		// Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio.
+		mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left;
+		mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
+		mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+
+		while ( ( d->m_src_buf_left ) || ( ( d->m_flush ) && ( lookahead_size ) ) )
+		{
+			const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
+			mz_uint dst_pos = ( lookahead_pos + lookahead_size ) & TDEFL_LZ_DICT_SIZE_MASK;
+			mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size );
+			d->m_src_buf_left -= num_bytes_to_process;
+			lookahead_size += num_bytes_to_process;
+
+			while ( num_bytes_to_process )
+			{
+				mz_uint32 n = MZ_MIN( TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process );
+				memcpy( d->m_dict + dst_pos, d->m_pSrc, n );
+				if ( dst_pos < ( TDEFL_MAX_MATCH_LEN - 1 ) )
+					memcpy( d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN( n, ( TDEFL_MAX_MATCH_LEN - 1 ) - dst_pos ) );
+				d->m_pSrc += n;
+				dst_pos = ( dst_pos + n ) & TDEFL_LZ_DICT_SIZE_MASK;
+				num_bytes_to_process -= n;
+			}
+
+			dict_size = MZ_MIN( TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size );
+			if ( ( !d->m_flush ) && ( lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE ) ) break;
+
+			while ( lookahead_size >= 4 )
+			{
+				mz_uint cur_match_dist, cur_match_len = 1;
+				mz_uint8 *pCur_dict = d->m_dict + cur_pos;
+				mz_uint first_trigram = ( *(const mz_uint32 *)pCur_dict ) & 0xFFFFFF;
+				mz_uint hash = ( first_trigram ^ ( first_trigram >> ( 24 - ( TDEFL_LZ_HASH_BITS - 8 ) ) ) ) & TDEFL_LEVEL1_HASH_SIZE_MASK;
+				mz_uint probe_pos = d->m_hash[hash];
+				d->m_hash[hash] = (mz_uint16)lookahead_pos;
+
+				if ( ( ( cur_match_dist = (mz_uint16)( lookahead_pos - probe_pos ) ) <= dict_size ) && ( ( *(const mz_uint32 *)( d->m_dict + ( probe_pos &= TDEFL_LZ_DICT_SIZE_MASK ) ) & 0xFFFFFF ) == first_trigram ) )
+				{
+					const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
+					const mz_uint16 *q = (const mz_uint16 *)( d->m_dict + probe_pos );
+					mz_uint32 probe_len = 32;
+					do {} while ( ( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) && ( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) &&
+						( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) && ( TDEFL_READ_UNALIGNED_WORD( ++p ) == TDEFL_READ_UNALIGNED_WORD( ++q ) ) && ( --probe_len > 0 ) );
+					cur_match_len = ( (mz_uint)( p - (const mz_uint16 *)pCur_dict ) * 2 ) + (mz_uint)( *(const mz_uint8 *)p == *(const mz_uint8 *)q );
+					if ( !probe_len )
+						cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
+
+					if ( ( cur_match_len < TDEFL_MIN_MATCH_LEN ) || ( ( cur_match_len == TDEFL_MIN_MATCH_LEN ) && ( cur_match_dist >= 8U * 1024U ) ) )
+					{
+						cur_match_len = 1;
+						*pLZ_code_buf++ = (mz_uint8)first_trigram;
+						*pLZ_flags = (mz_uint8)( *pLZ_flags >> 1 );
+						d->m_huff_count[0][(mz_uint8)first_trigram]++;
+					}
+					else
+					{
+						mz_uint32 s0, s1;
+						cur_match_len = MZ_MIN( cur_match_len, lookahead_size );
+
+						MZ_ASSERT( ( cur_match_len >= TDEFL_MIN_MATCH_LEN ) && ( cur_match_dist >= 1 ) && ( cur_match_dist <= TDEFL_LZ_DICT_SIZE ) );
+
+						cur_match_dist--;
+
+						pLZ_code_buf[0] = (mz_uint8)( cur_match_len - TDEFL_MIN_MATCH_LEN );
+						*(mz_uint16 *)( &pLZ_code_buf[1] ) = (mz_uint16)cur_match_dist;
+						pLZ_code_buf += 3;
+						*pLZ_flags = (mz_uint8)( ( *pLZ_flags >> 1 ) | 0x80 );
+
+						s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
+						s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
+						d->m_huff_count[1][( cur_match_dist < 512 ) ? s0 : s1]++;
+
+						d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++;
+					}
+				}
+				else
+				{
+					*pLZ_code_buf++ = (mz_uint8)first_trigram;
+					*pLZ_flags = (mz_uint8)( *pLZ_flags >> 1 );
+					d->m_huff_count[0][(mz_uint8)first_trigram]++;
+				}
+
+				if ( --num_flags_left == 0 ) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+				total_lz_bytes += cur_match_len;
+				lookahead_pos += cur_match_len;
+				dict_size = MZ_MIN( dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE );
+				cur_pos = ( cur_pos + cur_match_len ) & TDEFL_LZ_DICT_SIZE_MASK;
+				MZ_ASSERT( lookahead_size >= cur_match_len );
+				lookahead_size -= cur_match_len;
+
+				if ( pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8] )
+				{
+					int n;
+					d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+					d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+					if ( ( n = tdefl_flush_block( d, 0 ) ) != 0 )
+						return ( n < 0 ) ? MZ_FALSE : MZ_TRUE;
+					total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+				}
+			}
+
+			while ( lookahead_size )
+			{
+				mz_uint8 lit = d->m_dict[cur_pos];
+
+				total_lz_bytes++;
+				*pLZ_code_buf++ = lit;
+				*pLZ_flags = (mz_uint8)( *pLZ_flags >> 1 );
+				if ( --num_flags_left == 0 ) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+				d->m_huff_count[0][lit]++;
+
+				lookahead_pos++;
+				dict_size = MZ_MIN( dict_size + 1, TDEFL_LZ_DICT_SIZE );
+				cur_pos = ( cur_pos + 1 ) & TDEFL_LZ_DICT_SIZE_MASK;
+				lookahead_size--;
+
+				if ( pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8] )
+				{
+					int n;
+					d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+					d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+					if ( ( n = tdefl_flush_block( d, 0 ) ) != 0 )
+						return ( n < 0 ) ? MZ_FALSE : MZ_TRUE;
+					total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+				}
+			}
+		}
+
+		d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+		d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+		return MZ_TRUE;
+	}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+
+	static MZ_FORCEINLINE void tdefl_record_literal( tdefl_compressor *d, mz_uint8 lit )
+	{
+		d->m_total_lz_bytes++;
+		*d->m_pLZ_code_buf++ = lit;
+		*d->m_pLZ_flags = (mz_uint8)( *d->m_pLZ_flags >> 1 ); if ( --d->m_num_flags_left == 0 ) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+		d->m_huff_count[0][lit]++;
+	}
+
+	static MZ_FORCEINLINE void tdefl_record_match( tdefl_compressor *d, mz_uint match_len, mz_uint match_dist )
+	{
+		mz_uint32 s0, s1;
+
+		MZ_ASSERT( ( match_len >= TDEFL_MIN_MATCH_LEN ) && ( match_dist >= 1 ) && ( match_dist <= TDEFL_LZ_DICT_SIZE ) );
+
+		d->m_total_lz_bytes += match_len;
+
+		d->m_pLZ_code_buf[0] = (mz_uint8)( match_len - TDEFL_MIN_MATCH_LEN );
+
+		match_dist -= 1;
+		d->m_pLZ_code_buf[1] = (mz_uint8)( match_dist & 0xFF );
+		d->m_pLZ_code_buf[2] = (mz_uint8)( match_dist >> 8 ); d->m_pLZ_code_buf += 3;
+
+		*d->m_pLZ_flags = (mz_uint8)( ( *d->m_pLZ_flags >> 1 ) | 0x80 ); if ( --d->m_num_flags_left == 0 ) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+
+		s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[( match_dist >> 8 ) & 127];
+		d->m_huff_count[1][( match_dist < 512 ) ? s0 : s1]++;
+
+		if ( match_len >= TDEFL_MIN_MATCH_LEN ) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
+	}
+
+	static mz_bool tdefl_compress_normal( tdefl_compressor *d )
+	{
+		const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left;
+		tdefl_flush flush = d->m_flush;
+
+		while ( ( src_buf_left ) || ( ( flush ) && ( d->m_lookahead_size ) ) )
+		{
+			mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
+			// Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN.
+			if ( ( d->m_lookahead_size + d->m_dict_size ) >= ( TDEFL_MIN_MATCH_LEN - 1 ) )
+			{
+				mz_uint dst_pos = ( d->m_lookahead_pos + d->m_lookahead_size ) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
+				mz_uint hash = ( d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT ) ^ d->m_dict[( ins_pos + 1 ) & TDEFL_LZ_DICT_SIZE_MASK];
+				mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size );
+				const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process;
+				src_buf_left -= num_bytes_to_process;
+				d->m_lookahead_size += num_bytes_to_process;
+				while ( pSrc != pSrc_end )
+				{
+					mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if ( dst_pos < ( TDEFL_MAX_MATCH_LEN - 1 ) ) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+					hash = ( ( hash << TDEFL_LZ_HASH_SHIFT ) ^ c ) & ( TDEFL_LZ_HASH_SIZE - 1 );
+					d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)( ins_pos );
+					dst_pos = ( dst_pos + 1 ) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++;
+				}
+			}
+			else
+			{
+				while ( ( src_buf_left ) && ( d->m_lookahead_size < TDEFL_MAX_MATCH_LEN ) )
+				{
+					mz_uint8 c = *pSrc++;
+					mz_uint dst_pos = ( d->m_lookahead_pos + d->m_lookahead_size ) & TDEFL_LZ_DICT_SIZE_MASK;
+					src_buf_left--;
+					d->m_dict[dst_pos] = c;
+					if ( dst_pos < ( TDEFL_MAX_MATCH_LEN - 1 ) )
+						d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+					if ( ( ++d->m_lookahead_size + d->m_dict_size ) >= TDEFL_MIN_MATCH_LEN )
+					{
+						mz_uint ins_pos = d->m_lookahead_pos + ( d->m_lookahead_size - 1 ) - 2;
+						mz_uint hash = ( ( d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << ( TDEFL_LZ_HASH_SHIFT * 2 ) ) ^ ( d->m_dict[( ins_pos + 1 ) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT ) ^ c ) & ( TDEFL_LZ_HASH_SIZE - 1 );
+						d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)( ins_pos );
+					}
+				}
+			}
+			d->m_dict_size = MZ_MIN( TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size );
+			if ( ( !flush ) && ( d->m_lookahead_size < TDEFL_MAX_MATCH_LEN ) )
+				break;
+
+			// Simple lazy/greedy parsing state machine.
+			len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : ( TDEFL_MIN_MATCH_LEN - 1 ); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+			if ( d->m_flags & ( TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS ) )
+			{
+				if ( ( d->m_dict_size ) && ( !( d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS ) ) )
+				{
+					mz_uint8 c = d->m_dict[( cur_pos - 1 ) & TDEFL_LZ_DICT_SIZE_MASK];
+					cur_match_len = 0; while ( cur_match_len < d->m_lookahead_size ) { if ( d->m_dict[cur_pos + cur_match_len] != c ) break; cur_match_len++; }
+					if ( cur_match_len < TDEFL_MIN_MATCH_LEN ) cur_match_len = 0; else cur_match_dist = 1;
+				}
+			}
+			else
+			{
+				tdefl_find_match( d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len );
+			}
+			if ( ( ( cur_match_len == TDEFL_MIN_MATCH_LEN ) && ( cur_match_dist >= 8U * 1024U ) ) || ( cur_pos == cur_match_dist ) || ( ( d->m_flags & TDEFL_FILTER_MATCHES ) && ( cur_match_len <= 5 ) ) )
+			{
+				cur_match_dist = cur_match_len = 0;
+			}
+			if ( d->m_saved_match_len )
+			{
+				if ( cur_match_len > d->m_saved_match_len )
+				{
+					tdefl_record_literal( d, (mz_uint8)d->m_saved_lit );
+					if ( cur_match_len >= 128 )
+					{
+						tdefl_record_match( d, cur_match_len, cur_match_dist );
+						d->m_saved_match_len = 0; len_to_move = cur_match_len;
+					}
+					else
+					{
+						d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+					}
+				}
+				else
+				{
+					tdefl_record_match( d, d->m_saved_match_len, d->m_saved_match_dist );
+					len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0;
+				}
+			}
+			else if ( !cur_match_dist )
+				tdefl_record_literal( d, d->m_dict[MZ_MIN( cur_pos, sizeof( d->m_dict ) - 1 )] );
+			else if ( ( d->m_greedy_parsing ) || ( d->m_flags & TDEFL_RLE_MATCHES ) || ( cur_match_len >= 128 ) )
+			{
+				tdefl_record_match( d, cur_match_len, cur_match_dist );
+				len_to_move = cur_match_len;
+			}
+			else
+			{
+				d->m_saved_lit = d->m_dict[MZ_MIN( cur_pos, sizeof( d->m_dict ) - 1 )]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+			}
+			// Move the lookahead forward by len_to_move bytes.
+			d->m_lookahead_pos += len_to_move;
+			MZ_ASSERT( d->m_lookahead_size >= len_to_move );
+			d->m_lookahead_size -= len_to_move;
+			d->m_dict_size = MZ_MIN( d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE );
+			// Check if it's time to flush the current LZ codes to the internal output buffer.
+			if ( ( d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8] ) ||
+				( ( d->m_total_lz_bytes > 31 * 1024 ) && ( ( ( ( (mz_uint)( d->m_pLZ_code_buf - d->m_lz_code_buf ) * 115 ) >> 7 ) >= d->m_total_lz_bytes ) || ( d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS ) ) ) )
+			{
+				int n;
+				d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+				if ( ( n = tdefl_flush_block( d, 0 ) ) != 0 )
+					return ( n < 0 ) ? MZ_FALSE : MZ_TRUE;
+			}
+		}
+
+		d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+		return MZ_TRUE;
+	}
+
+	static tdefl_status tdefl_flush_output_buffer( tdefl_compressor *d )
+	{
+		if ( d->m_pIn_buf_size )
+		{
+			*d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+		}
+
+		if ( d->m_pOut_buf_size )
+		{
+			size_t n = MZ_MIN( *d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining );
+			memcpy( (mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n );
+			d->m_output_flush_ofs += (mz_uint)n;
+			d->m_output_flush_remaining -= (mz_uint)n;
+			d->m_out_buf_ofs += n;
+
+			*d->m_pOut_buf_size = d->m_out_buf_ofs;
+		}
+
+		return ( d->m_finished && !d->m_output_flush_remaining ) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY;
+	}
+
+	tdefl_status tdefl_compress( tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush )
+	{
+		if ( !d )
+		{
+			if ( pIn_buf_size ) *pIn_buf_size = 0;
+			if ( pOut_buf_size ) *pOut_buf_size = 0;
+			return TDEFL_STATUS_BAD_PARAM;
+		}
+
+		d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size;
+		d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size;
+		d->m_pSrc = (const mz_uint8 *)( pIn_buf ); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
+		d->m_out_buf_ofs = 0;
+		d->m_flush = flush;
+
+		if ( ( ( d->m_pPut_buf_func != NULL ) == ( ( pOut_buf != NULL ) || ( pOut_buf_size != NULL ) ) ) || ( d->m_prev_return_status != TDEFL_STATUS_OKAY ) ||
+			( d->m_wants_to_finish && ( flush != TDEFL_FINISH ) ) || ( pIn_buf_size && *pIn_buf_size && !pIn_buf ) || ( pOut_buf_size && *pOut_buf_size && !pOut_buf ) )
+		{
+			if ( pIn_buf_size ) *pIn_buf_size = 0;
+			if ( pOut_buf_size ) *pOut_buf_size = 0;
+			return ( d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM );
+		}
+		d->m_wants_to_finish |= ( flush == TDEFL_FINISH );
+
+		if ( ( d->m_output_flush_remaining ) || ( d->m_finished ) )
+			return ( d->m_prev_return_status = tdefl_flush_output_buffer( d ) );
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+		if ( ( ( d->m_flags & TDEFL_MAX_PROBES_MASK ) == 1 ) &&
+			( ( d->m_flags & TDEFL_GREEDY_PARSING_FLAG ) != 0 ) &&
+			( ( d->m_flags & ( TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES ) ) == 0 ) )
+		{
+			if ( !tdefl_compress_fast( d ) )
+				return d->m_prev_return_status;
+		}
+		else
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+		{
+			if ( !tdefl_compress_normal( d ) )
+				return d->m_prev_return_status;
+		}
+
+		if ( ( d->m_flags & ( TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32 ) ) && ( pIn_buf ) )
+			d->m_adler32 = (mz_uint32)mz_adler32( d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf );
+
+		if ( ( flush ) && ( !d->m_lookahead_size ) && ( !d->m_src_buf_left ) && ( !d->m_output_flush_remaining ) )
+		{
+			if ( tdefl_flush_block( d, flush ) < 0 )
+				return d->m_prev_return_status;
+			d->m_finished = ( flush == TDEFL_FINISH );
+			if ( flush == TDEFL_FULL_FLUSH ) { MZ_CLEAR_OBJ( d->m_hash ); MZ_CLEAR_OBJ( d->m_next ); d->m_dict_size = 0; }
+		}
+
+		return ( d->m_prev_return_status = tdefl_flush_output_buffer( d ) );
+	}
+
+	tdefl_status tdefl_compress_buffer( tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush )
+	{
+		MZ_ASSERT( d->m_pPut_buf_func ); return tdefl_compress( d, pIn_buf, &in_buf_size, NULL, NULL, flush );
+	}
+
+	tdefl_status tdefl_init( tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags )
+	{
+		d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user;
+		d->m_flags = (mz_uint)( flags ); d->m_max_probes[0] = 1 + ( ( flags & 0xFFF ) + 2 ) / 3; d->m_greedy_parsing = ( flags & TDEFL_GREEDY_PARSING_FLAG ) != 0;
+		d->m_max_probes[1] = 1 + ( ( ( flags & 0xFFF ) >> 2 ) + 2 ) / 3;
+		if ( !( flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG ) ) MZ_CLEAR_OBJ( d->m_hash );
+		d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
+		d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
+		d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8;
+		d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY;
+		d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1;
+		d->m_pIn_buf = NULL; d->m_pOut_buf = NULL;
+		d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL;
+		d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0;
+		memset( &d->m_huff_count[0][0], 0, sizeof( d->m_huff_count[0][0] ) * TDEFL_MAX_HUFF_SYMBOLS_0 );
+		memset( &d->m_huff_count[1][0], 0, sizeof( d->m_huff_count[1][0] ) * TDEFL_MAX_HUFF_SYMBOLS_1 );
+		return TDEFL_STATUS_OKAY;
+	}
+
+	tdefl_status tdefl_get_prev_return_status( tdefl_compressor *d )
+	{
+		return d->m_prev_return_status;
+	}
+
+	mz_uint32 tdefl_get_adler32( tdefl_compressor *d )
+	{
+		return d->m_adler32;
+	}
+
+	mz_bool tdefl_compress_mem_to_output( const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags )
+	{
+		tdefl_compressor *pComp; mz_bool succeeded; if ( ( ( buf_len ) && ( !pBuf ) ) || ( !pPut_buf_func ) ) return MZ_FALSE;
+		pComp = (tdefl_compressor*)MZ_MALLOC( sizeof( tdefl_compressor ) ); if ( !pComp ) return MZ_FALSE;
+		succeeded = ( tdefl_init( pComp, pPut_buf_func, pPut_buf_user, flags ) == TDEFL_STATUS_OKAY );
+		succeeded = succeeded && ( tdefl_compress_buffer( pComp, pBuf, buf_len, TDEFL_FINISH ) == TDEFL_STATUS_DONE );
+		MZ_FREE( pComp ); return succeeded;
+	}
+
+	typedef struct
+	{
+		size_t m_size, m_capacity;
+		mz_uint8 *m_pBuf;
+		mz_bool m_expandable;
+	} tdefl_output_buffer;
+
+	static mz_bool tdefl_output_buffer_putter( const void *pBuf, int len, void *pUser )
+	{
+		tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
+		size_t new_size = p->m_size + len;
+		if ( new_size > p->m_capacity )
+		{
+			size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if ( !p->m_expandable ) return MZ_FALSE;
+			do { new_capacity = MZ_MAX( 128U, new_capacity << 1U ); } while ( new_size > new_capacity );
+			pNew_buf = (mz_uint8*)MZ_REALLOC( p->m_pBuf, new_capacity ); if ( !pNew_buf ) return MZ_FALSE;
+			p->m_pBuf = pNew_buf; p->m_capacity = new_capacity;
+		}
+		memcpy( (mz_uint8*)p->m_pBuf + p->m_size, pBuf, len ); p->m_size = new_size;
+		return MZ_TRUE;
+	}
+
+	void *tdefl_compress_mem_to_heap( const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags )
+	{
+		tdefl_output_buffer out_buf; MZ_CLEAR_OBJ( out_buf );
+		if ( !pOut_len ) return MZ_FALSE; else *pOut_len = 0;
+		out_buf.m_expandable = MZ_TRUE;
+		if ( !tdefl_compress_mem_to_output( pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags ) ) return NULL;
+		*pOut_len = out_buf.m_size; return out_buf.m_pBuf;
+	}
+
+	size_t tdefl_compress_mem_to_mem( void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags )
+	{
+		tdefl_output_buffer out_buf; MZ_CLEAR_OBJ( out_buf );
+		if ( !pOut_buf ) return 0;
+		out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len;
+		if ( !tdefl_compress_mem_to_output( pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags ) ) return 0;
+		return out_buf.m_size;
+	}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+	static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+
+	// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files).
+	mz_uint tdefl_create_comp_flags_from_zip_params( int level, int window_bits, int strategy )
+	{
+		mz_uint comp_flags = s_tdefl_num_probes[( level >= 0 ) ? MZ_MIN( 10, level ) : MZ_DEFAULT_LEVEL] | ( ( level <= 3 ) ? TDEFL_GREEDY_PARSING_FLAG : 0 );
+		if ( window_bits > 0 ) comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
+
+		if ( !level ) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
+		else if ( strategy == MZ_FILTERED ) comp_flags |= TDEFL_FILTER_MATCHES;
+		else if ( strategy == MZ_HUFFMAN_ONLY ) comp_flags &= ~TDEFL_MAX_PROBES_MASK;
+		else if ( strategy == MZ_FIXED ) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
+		else if ( strategy == MZ_RLE ) comp_flags |= TDEFL_RLE_MATCHES;
+
+		return comp_flags;
+	}
+#endif //MINIZ_NO_ZLIB_APIS
+
+#ifdef _MSC_VER
+#pragma warning (push)
+#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal)
+#endif
+
+	// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at
+	// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
+	// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck.
+	void *tdefl_write_image_to_png_file_in_memory_ex( const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip )
+	{
+		// Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined.
+		static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+		tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC( sizeof( tdefl_compressor ) ); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0;
+		if ( !pComp ) return NULL;
+		MZ_CLEAR_OBJ( out_buf ); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57 + MZ_MAX( 64, ( 1 + bpl )*h ); if ( NULL == ( out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC( out_buf.m_capacity ) ) ) { MZ_FREE( pComp ); return NULL; }
+		// write dummy header
+		for ( z = 41; z; --z ) tdefl_output_buffer_putter( &z, 1, &out_buf );
+		// compress image data
+		tdefl_init( pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN( 10, level )] | TDEFL_WRITE_ZLIB_HEADER );
+		for ( y = 0; y < h; ++y ) { tdefl_compress_buffer( pComp, &z, 1, TDEFL_NO_FLUSH ); tdefl_compress_buffer( pComp, (mz_uint8*)pImage + ( flip ? ( h - 1 - y ) : y ) * bpl, bpl, TDEFL_NO_FLUSH ); }
+		if ( tdefl_compress_buffer( pComp, NULL, 0, TDEFL_FINISH ) != TDEFL_STATUS_DONE ) { MZ_FREE( pComp ); MZ_FREE( out_buf.m_pBuf ); return NULL; }
+		// write real header
+		*pLen_out = out_buf.m_size - 41;
+		{
+			static const mz_uint8 chans[] = { 0x00, 0x00, 0x04, 0x02, 0x06 };
+			mz_uint8 pnghdr[41] = { 0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52,
+				0,0,(mz_uint8)( w >> 8 ),(mz_uint8)w,0,0,(mz_uint8)( h >> 8 ),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0,
+				(mz_uint8)( *pLen_out >> 24 ),(mz_uint8)( *pLen_out >> 16 ),(mz_uint8)( *pLen_out >> 8 ),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54 };
+			c = (mz_uint32)mz_crc32( MZ_CRC32_INIT, pnghdr + 12, 17 ); for ( i = 0; i < 4; ++i, c <<= 8 ) ( (mz_uint8*)( pnghdr + 29 ) )[i] = (mz_uint8)( c >> 24 );
+			memcpy( out_buf.m_pBuf, pnghdr, 41 );
+		}
+		// write footer (IDAT CRC-32, followed by IEND chunk)
+		if ( !tdefl_output_buffer_putter( "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf ) ) { *pLen_out = 0; MZ_FREE( pComp ); MZ_FREE( out_buf.m_pBuf ); return NULL; }
+		c = (mz_uint32)mz_crc32( MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4 ); for ( i = 0; i < 4; ++i, c <<= 8 ) ( out_buf.m_pBuf + out_buf.m_size - 16 )[i] = (mz_uint8)( c >> 24 );
+		// compute final size of file, grab compressed data buffer and return
+		*pLen_out += 57; MZ_FREE( pComp ); return out_buf.m_pBuf;
+	}
+	void *tdefl_write_image_to_png_file_in_memory( const void *pImage, int w, int h, int num_chans, size_t *pLen_out )
+	{
+		// Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out)
+		return tdefl_write_image_to_png_file_in_memory_ex( pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE );
+	}
+
+#ifdef _MSC_VER
+#pragma warning (pop)
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+void * nv::miniz_decompress( const void *source_buf, size_t source_buf_len, size_t *out_len, bool parse_header )
+{
+	NV_PROFILE( "decompress" );
+	return tinfl_decompress_mem_to_heap( source_buf, source_buf_len, out_len, parse_header ? TINFL_FLAG_PARSE_ZLIB_HEADER : 0 );
+}
Index: trunk/src/image/png_loader.cc
===================================================================
--- trunk/src/image/png_loader.cc	(revision 484)
+++ trunk/src/image/png_loader.cc	(revision 484)
@@ -0,0 +1,1083 @@
+// Copyright (C) 2015-2015 ChaosForge Ltd
+// http://chaosforge.org/
+//
+// This file is part of Nova libraries. 
+// For conditions of distribution and use, see copying.txt file in root folder.
+
+#include "nv/image/png_loader.hh"
+
+#include "nv/image/miniz.hh"
+
+using namespace nv;
+
+enum
+{
+	STBI_default = 0, // only used for req_comp
+
+	STBI_grey = 1,
+	STBI_grey_alpha = 2,
+	STBI_rgb = 3,
+	STBI_rgb_alpha = 4
+};
+
+typedef struct
+{
+	int( *read )  ( void *user, char *data, int size );   // fill 'data' with 'size' bytes.  return number of bytes actually read
+	void( *skip )  ( void *user, int n );                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+	int( *eof )   ( void *user );                       // returns nonzero if we are at end of file/data
+} stbi_io_callbacks;
+
+#define STBI_MALLOC(sz)    nvmalloc(sz)
+#define STBI_REALLOC(p,sz) nvrealloc(p,sz)
+#define STBI_FREE(p)       nvfree(p)
+
+static void *stbi__malloc( size_t size )
+{
+	return STBI_MALLOC( size );
+}
+
+
+template < typename T >
+inline uchar8 byte_cast( T x )
+{
+	return uchar8( (x)& 255 );
+}
+
+#define stbi__err(x,y)  0
+#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
+
+
+static uchar8 stbi__compute_y( int r, int g, int b )
+{
+	return (uchar8)( ( ( r * 77 ) + ( g * 150 ) + ( 29 * b ) ) >> 8 );
+}
+
+static unsigned char *stbi__convert_format( unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y )
+{
+	int i, j;
+	unsigned char *good;
+
+	if ( req_comp == img_n ) return data;
+	NV_ASSERT( req_comp >= 1 && req_comp <= 4, "!" );
+
+	good = (unsigned char *)stbi__malloc( req_comp * x * y );
+	if ( good == NULL )
+	{
+		STBI_FREE( data );
+		return stbi__errpuc( "outofmem", "Out of memory" );
+	}
+
+	for ( j = 0; j < (int)y; ++j )
+	{
+		unsigned char *src = data + j * x * img_n;
+		unsigned char *dest = good + j * x * req_comp;
+
+#define COMBO(a,b)  ((a)*8+(b))
+#define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+		// convert source image with img_n components to one with req_comp components;
+		// avoid switch per pixel, so use switch per scanline and massive macros
+		switch ( COMBO( img_n, req_comp ) )
+		{
+			CASE( 1, 2 ) dest[0] = src[0], dest[1] = 255; break;
+			CASE( 1, 3 ) dest[0] = dest[1] = dest[2] = src[0]; break;
+			CASE( 1, 4 ) dest[0] = dest[1] = dest[2] = src[0], dest[3] = 255; break;
+			CASE( 2, 1 ) dest[0] = src[0]; break;
+			CASE( 2, 3 ) dest[0] = dest[1] = dest[2] = src[0]; break;
+			CASE( 2, 4 ) dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; break;
+			CASE( 3, 4 ) dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 255; break;
+			CASE( 3, 1 ) dest[0] = stbi__compute_y( src[0], src[1], src[2] ); break;
+			CASE( 3, 2 ) dest[0] = stbi__compute_y( src[0], src[1], src[2] ), dest[1] = 255; break;
+			CASE( 4, 1 ) dest[0] = stbi__compute_y( src[0], src[1], src[2] ); break;
+			CASE( 4, 2 ) dest[0] = stbi__compute_y( src[0], src[1], src[2] ), dest[1] = src[3]; break;
+			CASE( 4, 3 ) dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; break;
+		default: NV_ASSERT( 0, "!" );
+		}
+#undef CASE
+	}
+
+	STBI_FREE( data );
+	return good;
+}
+
+
+struct stbi__context
+{
+	uint32 img_x, img_y;
+	int img_n, img_out_n;
+
+	void rewind()
+	{
+		// conceptually rewind SHOULD rewind to the beginning of the stream,
+		// but we just rewind to the beginning of the initial buffer, because
+		// we only use it after doing 'test', which only ever looks at at most 92 bytes
+		m_img_buffer     = m_img_buffer_original;
+		m_img_buffer_end = m_img_buffer_original_end;
+	}
+
+	// initialize a memory-decode context
+	stbi__context( const uchar8* buffer, int len )
+	{
+		m_io.read = NULL;
+		m_read_from_callbacks = 0;
+		m_img_buffer          = m_img_buffer_original     = (uchar8 *)buffer;
+		m_img_buffer_end      = m_img_buffer_original_end = (uchar8 *)buffer + len;
+	}
+
+	// initialize a callback-based context
+	stbi__context( stbi_io_callbacks *c, void *user )
+	{
+		m_io = *c;
+		m_io_user_data = user;
+		m_buflen = sizeof( m_buffer_start );
+		m_read_from_callbacks = 1;
+		m_img_buffer_original = m_buffer_start;
+		refill_buffer();
+		m_img_buffer_original_end = m_img_buffer_end;
+	}
+
+	void refill_buffer()
+	{
+		int n = ( m_io.read )( m_io_user_data, (char*)m_buffer_start, m_buflen );
+		if ( n == 0 )
+		{
+			// at end of file, treat same as if from memory, but need to handle case
+			// where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+			m_read_from_callbacks = 0;
+			m_img_buffer = m_buffer_start;
+			m_img_buffer_end = m_buffer_start + 1;
+			*m_img_buffer = 0;
+		}
+		else
+		{
+			m_img_buffer = m_buffer_start;
+			m_img_buffer_end = m_buffer_start + n;
+		}
+	}
+
+	uchar8 get8()
+	{
+		if ( m_img_buffer < m_img_buffer_end )
+			return *m_img_buffer++;
+		if ( m_read_from_callbacks )
+		{
+			refill_buffer();
+			return *m_img_buffer++;
+		}
+		return 0;
+	}
+
+	inline int at_eof()
+	{
+		if ( m_io.read )
+		{
+			if ( !( m_io.eof )( m_io_user_data ) ) return 0;
+			// if feof() is true, check if buffer = end
+			// special case: we've only got the special 0 character at the end
+			if ( m_read_from_callbacks == 0 ) return 1;
+		}
+
+		return m_img_buffer >= m_img_buffer_end;
+	}
+
+	void skip( int n )
+	{
+		if ( n < 0 )
+		{
+			m_img_buffer = m_img_buffer_end;
+			return;
+		}
+		if ( m_io.read )
+		{
+			int blen = (int)( m_img_buffer_end - m_img_buffer );
+			if ( blen < n )
+			{
+				m_img_buffer = m_img_buffer_end;
+				( m_io.skip )( m_io_user_data, n - blen );
+				return;
+			}
+		}
+		m_img_buffer += n;
+	}
+
+	int getn( uchar8 *buffer, int n )
+	{
+		if ( m_io.read )
+		{
+			int blen = (int)( m_img_buffer_end - m_img_buffer );
+			if ( blen < n )
+			{
+				int res, count;
+
+				nvmemcpy( buffer, m_img_buffer, blen );
+
+				count = ( m_io.read )( m_io_user_data, (char*)buffer + blen, n - blen );
+				res = ( count == ( n - blen ) );
+				m_img_buffer = m_img_buffer_end;
+				return res;
+			}
+		}
+
+		if ( m_img_buffer + n <= m_img_buffer_end )
+		{
+			nvmemcpy( buffer, m_img_buffer, n );
+			m_img_buffer += n;
+			return 1;
+		}
+		else
+			return 0;
+	}
+
+	inline int get16be()
+	{
+		int z = get8();
+		return ( z << 8 ) + get8();
+	}
+
+	inline uint32 get32be()
+	{
+		uint32 z = get16be();
+		return ( z << 16 ) + get16be();
+	}
+
+	int remaining()
+	{
+		return m_img_buffer_end - m_img_buffer;
+	}
+
+private:
+	stbi_io_callbacks m_io;
+	void* m_io_user_data;
+
+	int m_read_from_callbacks;
+	int m_buflen;
+	uchar8 m_buffer_start[128];
+
+	uchar8 *m_img_buffer, *m_img_buffer_end;
+	uchar8 *m_img_buffer_original, *m_img_buffer_original_end;
+
+
+};
+
+
+enum
+{
+	STBI__SCAN_load = 0,
+	STBI__SCAN_type,
+	STBI__SCAN_header
+};
+
+static int      stbi__png_test( stbi__context *s );
+static uchar8 *stbi__png_load( stbi__context *s, int *x, int *y, int *comp, int req_comp );
+static int      stbi__png_info( stbi__context *s, int *x, int *y, int *comp );
+
+typedef struct
+{
+	uint32 length;
+	uint32 type;
+} stbi__pngchunk;
+
+static stbi__pngchunk stbi__get_chunk_header( stbi__context *s )
+{
+	stbi__pngchunk c;
+	c.length = s->get32be();
+	c.type = s->get32be();
+	return c;
+}
+
+static int stbi__check_png_header( stbi__context *s )
+{
+	static uchar8 png_sig[8] = { 137,80,78,71,13,10,26,10 };
+	int i;
+	for ( i = 0; i < 8; ++i )
+		if ( s->get8() != png_sig[i] ) return stbi__err( "bad png sig", "Not a PNG" );
+	return 1;
+}
+
+typedef struct
+{
+	stbi__context *s;
+	uchar8 *idata, *expanded, *out;
+} stbi__png;
+
+
+enum
+{
+	STBI__F_none = 0,
+	STBI__F_sub = 1,
+	STBI__F_up = 2,
+	STBI__F_avg = 3,
+	STBI__F_paeth = 4,
+	// synthetic filters used for first scanline to avoid needing a dummy row of 0s
+	STBI__F_avg_first,
+	STBI__F_paeth_first
+};
+
+static uchar8 first_row_filter[5] =
+{
+	STBI__F_none,
+	STBI__F_sub,
+	STBI__F_none,
+	STBI__F_avg_first,
+	STBI__F_paeth_first
+};
+
+static int stbi__paeth( int a, int b, int c )
+{
+	int p = a + b - c;
+	int pa = abs( p - a );
+	int pb = abs( p - b );
+	int pc = abs( p - c );
+	if ( pa <= pb && pa <= pc ) return a;
+	if ( pb <= pc ) return b;
+	return c;
+}
+
+static uchar8 stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// create the png data from post-deflated data
+static int stbi__create_png_image_raw( stbi__png *a, uchar8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y, int depth, int color )
+{
+	stbi__context *s = a->s;
+	uint32 i, j, stride = x*out_n;
+	uint32 img_len, img_width_bytes;
+	int k;
+	int img_n = s->img_n; // copy it into a local for later
+
+	NV_ASSERT( out_n == s->img_n || out_n == s->img_n + 1, "!" );
+	a->out = (uchar8 *)stbi__malloc( x * y * out_n ); // extra bytes to write off the end into
+	if ( !a->out ) return stbi__err( "outofmem", "Out of memory" );
+
+	img_width_bytes = ( ( ( img_n * x * depth ) + 7 ) >> 3 );
+	img_len = ( img_width_bytes + 1 ) * y;
+	if ( s->img_x == x && s->img_y == y )
+	{
+		if ( raw_len != img_len ) return stbi__err( "not enough pixels", "Corrupt PNG" );
+	}
+	else
+	{ // interlaced:
+		if ( raw_len < img_len ) return stbi__err( "not enough pixels", "Corrupt PNG" );
+	}
+
+	for ( j = 0; j < y; ++j )
+	{
+		uchar8 *cur = a->out + stride*j;
+		uchar8 *prior = cur - stride;
+		int filter = *raw++;
+		int filter_bytes = img_n;
+		int width = x;
+		if ( filter > 4 )
+			return stbi__err( "invalid filter", "Corrupt PNG" );
+
+		if ( depth < 8 )
+		{
+			NV_ASSERT( img_width_bytes <= x, "!" );
+			cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
+			filter_bytes = 1;
+			width = img_width_bytes;
+		}
+
+		// if first row, use special filter that doesn't sample previous row
+		if ( j == 0 ) filter = first_row_filter[filter];
+
+		// handle first byte explicitly
+		for ( k = 0; k < filter_bytes; ++k )
+		{
+			switch ( filter )
+			{
+			case STBI__F_none: cur[k] = raw[k]; break;
+			case STBI__F_sub: cur[k] = raw[k]; break;
+			case STBI__F_up: cur[k] = byte_cast( raw[k] + prior[k] ); break;
+			case STBI__F_avg: cur[k] = byte_cast( raw[k] + ( prior[k] >> 1 ) ); break;
+			case STBI__F_paeth: cur[k] = byte_cast( raw[k] + stbi__paeth( 0, prior[k], 0 ) ); break;
+			case STBI__F_avg_first: cur[k] = raw[k]; break;
+			case STBI__F_paeth_first: cur[k] = raw[k]; break;
+			}
+		}
+
+		if ( depth == 8 )
+		{
+			if ( img_n != out_n )
+				cur[img_n] = 255; // first pixel
+			raw += img_n;
+			cur += out_n;
+			prior += out_n;
+		}
+		else
+		{
+			raw += 1;
+			cur += 1;
+			prior += 1;
+		}
+
+		// this is a little gross, so that we don't switch per-pixel or per-component
+		if ( depth < 8 || img_n == out_n )
+		{
+			int nk = ( width - 1 )*img_n;
+#define CASE(f) \
+             case f:     \
+                for (k=0; k < nk; ++k)
+			switch ( filter )
+			{
+				// "none" filter turns into a memcpy here; make that explicit.
+			case STBI__F_none:         nvmemcpy( cur, raw, nk ); break;
+				CASE( STBI__F_sub )          cur[k] = byte_cast( raw[k] + cur[k - filter_bytes] ); break;
+				CASE( STBI__F_up )           cur[k] = byte_cast( raw[k] + prior[k] ); break;
+				CASE( STBI__F_avg )          cur[k] = byte_cast( raw[k] + ( ( prior[k] + cur[k - filter_bytes] ) >> 1 ) ); break;
+				CASE( STBI__F_paeth )        cur[k] = byte_cast( raw[k] + stbi__paeth( cur[k - filter_bytes], prior[k], prior[k - filter_bytes] ) ); break;
+				CASE( STBI__F_avg_first )    cur[k] = byte_cast( raw[k] + ( cur[k - filter_bytes] >> 1 ) ); break;
+				CASE( STBI__F_paeth_first )  cur[k] = byte_cast( raw[k] + stbi__paeth( cur[k - filter_bytes], 0, 0 ) ); break;
+			}
+#undef CASE
+			raw += nk;
+		}
+		else
+		{
+			NV_ASSERT( img_n + 1 == out_n, "!" );
+#define CASE(f) \
+             case f:     \
+                for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
+                   for (k=0; k < img_n; ++k)
+			switch ( filter )
+			{
+				CASE( STBI__F_none )         cur[k] = raw[k]; break;
+				CASE( STBI__F_sub )          cur[k] = byte_cast( raw[k] + cur[k - out_n] ); break;
+				CASE( STBI__F_up )           cur[k] = byte_cast( raw[k] + prior[k] ); break;
+				CASE( STBI__F_avg )          cur[k] = byte_cast( raw[k] + ( ( prior[k] + cur[k - out_n] ) >> 1 ) ); break;
+				CASE( STBI__F_paeth )        cur[k] = byte_cast( raw[k] + stbi__paeth( cur[k - out_n], prior[k], prior[k - out_n] ) ); break;
+				CASE( STBI__F_avg_first )    cur[k] = byte_cast( raw[k] + ( cur[k - out_n] >> 1 ) ); break;
+				CASE( STBI__F_paeth_first )  cur[k] = byte_cast( raw[k] + stbi__paeth( cur[k - out_n], 0, 0 ) ); break;
+			}
+#undef CASE
+		}
+	}
+
+	// we make a separate pass to expand bits to pixels; for performance,
+	// this could run two scanlines behind the above code, so it won't
+	// intefere with filtering but will still be in the cache.
+	if ( depth < 8 )
+	{
+		for ( j = 0; j < y; ++j )
+		{
+			uchar8 *cur = a->out + stride*j;
+			uchar8 *in = a->out + stride*j + x*out_n - img_width_bytes;
+			// unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
+			// png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+			uchar8 scale = ( color == 0 ) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+
+																				 // note that the final byte might overshoot and write more data than desired.
+																				 // we can allocate enough data that this never writes out of memory, but it
+																				 // could also overwrite the next scanline. can it overwrite non-empty data
+																				 // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
+																				 // so we need to explicitly clamp the final ones
+
+			if ( depth == 4 )
+			{
+				for ( k = x*img_n; k >= 2; k -= 2, ++in )
+				{
+					*cur++ = scale * ( ( *in >> 4 ) );
+					*cur++ = scale * ( ( *in ) & 0x0f );
+				}
+				if ( k > 0 ) *cur++ = scale * ( ( *in >> 4 ) );
+			}
+			else if ( depth == 2 )
+			{
+				for ( k = x*img_n; k >= 4; k -= 4, ++in )
+				{
+					*cur++ = scale * ( ( *in >> 6 ) );
+					*cur++ = scale * ( ( *in >> 4 ) & 0x03 );
+					*cur++ = scale * ( ( *in >> 2 ) & 0x03 );
+					*cur++ = scale * ( ( *in ) & 0x03 );
+				}
+				if ( k > 0 ) *cur++ = scale * ( ( *in >> 6 ) );
+				if ( k > 1 ) *cur++ = scale * ( ( *in >> 4 ) & 0x03 );
+				if ( k > 2 ) *cur++ = scale * ( ( *in >> 2 ) & 0x03 );
+			}
+			else if ( depth == 1 )
+			{
+				for ( k = x*img_n; k >= 8; k -= 8, ++in )
+				{
+					*cur++ = scale * ( ( *in >> 7 ) );
+					*cur++ = scale * ( ( *in >> 6 ) & 0x01 );
+					*cur++ = scale * ( ( *in >> 5 ) & 0x01 );
+					*cur++ = scale * ( ( *in >> 4 ) & 0x01 );
+					*cur++ = scale * ( ( *in >> 3 ) & 0x01 );
+					*cur++ = scale * ( ( *in >> 2 ) & 0x01 );
+					*cur++ = scale * ( ( *in >> 1 ) & 0x01 );
+					*cur++ = scale * ( ( *in ) & 0x01 );
+				}
+				if ( k > 0 ) *cur++ = scale * ( ( *in >> 7 ) );
+				if ( k > 1 ) *cur++ = scale * ( ( *in >> 6 ) & 0x01 );
+				if ( k > 2 ) *cur++ = scale * ( ( *in >> 5 ) & 0x01 );
+				if ( k > 3 ) *cur++ = scale * ( ( *in >> 4 ) & 0x01 );
+				if ( k > 4 ) *cur++ = scale * ( ( *in >> 3 ) & 0x01 );
+				if ( k > 5 ) *cur++ = scale * ( ( *in >> 2 ) & 0x01 );
+				if ( k > 6 ) *cur++ = scale * ( ( *in >> 1 ) & 0x01 );
+			}
+			if ( img_n != out_n )
+			{
+				int q;
+				// insert alpha = 255
+				cur = a->out + stride*j;
+				if ( img_n == 1 )
+				{
+					for ( q = x - 1; q >= 0; --q )
+					{
+						cur[q * 2 + 1] = 255;
+						cur[q * 2 + 0] = cur[q];
+					}
+				}
+				else
+				{
+					NV_ASSERT( img_n == 3, "!" );
+					for ( q = x - 1; q >= 0; --q )
+					{
+						cur[q * 4 + 3] = 255;
+						cur[q * 4 + 2] = cur[q * 3 + 2];
+						cur[q * 4 + 1] = cur[q * 3 + 1];
+						cur[q * 4 + 0] = cur[q * 3 + 0];
+					}
+				}
+			}
+		}
+	}
+
+	return 1;
+}
+
+static int stbi__create_png_image( stbi__png *a, uchar8 *image_data, uint32 image_data_len, int out_n, int depth, int color, int interlaced )
+{
+	uchar8 *final;
+	int p;
+	if ( !interlaced )
+		return stbi__create_png_image_raw( a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color );
+
+	// de-interlacing
+	final = (uchar8 *)stbi__malloc( a->s->img_x * a->s->img_y * out_n );
+	for ( p = 0; p < 7; ++p )
+	{
+		int xorig[] = { 0,4,0,2,0,1,0 };
+		int yorig[] = { 0,0,4,0,2,0,1 };
+		int xspc[] = { 8,8,4,4,2,2,1 };
+		int yspc[] = { 8,8,8,4,4,2,2 };
+		int i, j, x, y;
+		// pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+		x = ( a->s->img_x - xorig[p] + xspc[p] - 1 ) / xspc[p];
+		y = ( a->s->img_y - yorig[p] + yspc[p] - 1 ) / yspc[p];
+		if ( x && y )
+		{
+			uint32 img_len = ( ( ( ( a->s->img_n * x * depth ) + 7 ) >> 3 ) + 1 ) * y;
+			if ( !stbi__create_png_image_raw( a, image_data, image_data_len, out_n, x, y, depth, color ) )
+			{
+				STBI_FREE( final );
+				return 0;
+			}
+			for ( j = 0; j < y; ++j )
+			{
+				for ( i = 0; i < x; ++i )
+				{
+					int out_y = j*yspc[p] + yorig[p];
+					int out_x = i*xspc[p] + xorig[p];
+					nvmemcpy( final + out_y*a->s->img_x*out_n + out_x*out_n,
+						a->out + ( j*x + i )*out_n, out_n );
+				}
+			}
+			STBI_FREE( a->out );
+			image_data += img_len;
+			image_data_len -= img_len;
+		}
+	}
+	a->out = final;
+
+	return 1;
+}
+
+static int stbi__compute_transparency( stbi__png *z, uchar8 tc[3], int out_n )
+{
+	stbi__context *s = z->s;
+	uint32 i, pixel_count = s->img_x * s->img_y;
+	uchar8 *p = z->out;
+
+	// compute color-based transparency, assuming we've
+	// already got 255 as the alpha value in the output
+	NV_ASSERT( out_n == 2 || out_n == 4, "!" );
+
+	if ( out_n == 2 )
+	{
+		for ( i = 0; i < pixel_count; ++i )
+		{
+			p[1] = ( p[0] == tc[0] ? 0 : 255 );
+			p += 2;
+		}
+	}
+	else
+	{
+		for ( i = 0; i < pixel_count; ++i )
+		{
+			if ( p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2] )
+				p[3] = 0;
+			p += 4;
+		}
+	}
+	return 1;
+}
+
+static int stbi__expand_png_palette( stbi__png *a, uchar8 *palette, int len, int pal_img_n )
+{
+	uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+	uchar8 *p, *temp_out, *orig = a->out;
+
+	p = (uchar8 *)stbi__malloc( pixel_count * pal_img_n );
+	if ( p == NULL ) return stbi__err( "outofmem", "Out of memory" );
+
+	// between here and free(out) below, exitting would leak
+	temp_out = p;
+
+	if ( pal_img_n == 3 )
+	{
+		for ( i = 0; i < pixel_count; ++i )
+		{
+			int n = orig[i] * 4;
+			p[0] = palette[n];
+			p[1] = palette[n + 1];
+			p[2] = palette[n + 2];
+			p += 3;
+		}
+	}
+	else
+	{
+		for ( i = 0; i < pixel_count; ++i )
+		{
+			int n = orig[i] * 4;
+			p[0] = palette[n];
+			p[1] = palette[n + 1];
+			p[2] = palette[n + 2];
+			p[3] = palette[n + 3];
+			p += 4;
+		}
+	}
+	STBI_FREE( a->out );
+	a->out = temp_out;
+
+	NV_UNUSED( len );
+//	STBI_NOTUSED( len );
+
+	return 1;
+}
+
+static int stbi__unpremultiply_on_load = 0;
+static int stbi__de_iphone_flag = 0;
+
+static void stbi_set_unpremultiply_on_load( int flag_true_if_should_unpremultiply )
+{
+	stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+}
+
+static void stbi_convert_iphone_png_to_rgb( int flag_true_if_should_convert )
+{
+	stbi__de_iphone_flag = flag_true_if_should_convert;
+}
+
+static void stbi__de_iphone( stbi__png *z )
+{
+	stbi__context *s = z->s;
+	uint32 i, pixel_count = s->img_x * s->img_y;
+	uchar8 *p = z->out;
+
+	if ( s->img_out_n == 3 )
+	{  // convert bgr to rgb
+		for ( i = 0; i < pixel_count; ++i )
+		{
+			uchar8 t = p[0];
+			p[0] = p[2];
+			p[2] = t;
+			p += 3;
+		}
+	}
+	else
+	{
+		NV_ASSERT( s->img_out_n == 4, "!" );
+		if ( stbi__unpremultiply_on_load )
+		{
+			// convert bgr to rgb and unpremultiply
+			for ( i = 0; i < pixel_count; ++i )
+			{
+				uchar8 a = p[3];
+				uchar8 t = p[0];
+				if ( a )
+				{
+					p[0] = p[2] * 255 / a;
+					p[1] = p[1] * 255 / a;
+					p[2] = t * 255 / a;
+				}
+				else
+				{
+					p[0] = p[2];
+					p[2] = t;
+				}
+				p += 4;
+			}
+		}
+		else
+		{
+			// convert bgr to rgb
+			for ( i = 0; i < pixel_count; ++i )
+			{
+				uchar8 t = p[0];
+				p[0] = p[2];
+				p[2] = t;
+				p += 4;
+			}
+		}
+	}
+}
+
+#define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
+
+static int stbi__parse_png_file( stbi__png *z, int scan, int req_comp )
+{
+	uchar8 palette[1024], pal_img_n = 0;
+	uchar8 has_trans = 0, tc[3];
+	uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
+	int first = 1, k, interlace = 0, color = 0, depth = 0, is_iphone = 0;
+	stbi__context *s = z->s;
+
+	z->expanded = NULL;
+	z->idata = NULL;
+	z->out = NULL;
+
+	if ( !stbi__check_png_header( s ) ) return 0;
+
+	if ( scan == STBI__SCAN_type ) return 1;
+
+	for ( ;;)
+	{
+		stbi__pngchunk c = stbi__get_chunk_header( s );
+		switch ( c.type )
+		{
+		case STBI__PNG_TYPE( 'C', 'g', 'B', 'I' ):
+			is_iphone = 1;
+			s->skip( c.length );
+			break;
+		case STBI__PNG_TYPE( 'I', 'H', 'D', 'R' ): {
+			int comp, filter;
+			if ( !first ) return stbi__err( "multiple IHDR", "Corrupt PNG" );
+			first = 0;
+			if ( c.length != 13 ) return stbi__err( "bad IHDR len", "Corrupt PNG" );
+			s->img_x = s->get32be(); if ( s->img_x > ( 1 << 24 ) ) return stbi__err( "too large", "Very large image (corrupt?)" );
+			s->img_y = s->get32be(); if ( s->img_y > ( 1 << 24 ) ) return stbi__err( "too large", "Very large image (corrupt?)" );
+			depth = s->get8();  if ( depth != 1 && depth != 2 && depth != 4 && depth != 8 )  return stbi__err( "1/2/4/8-bit only", "PNG not supported: 1/2/4/8-bit only" );
+			color = s->get8();  if ( color > 6 )         return stbi__err( "bad ctype", "Corrupt PNG" );
+			if ( color == 3 ) pal_img_n = 3; else if ( color & 1 ) return stbi__err( "bad ctype", "Corrupt PNG" );
+			comp = s->get8();  if ( comp ) return stbi__err( "bad comp method", "Corrupt PNG" );
+			filter = s->get8();  if ( filter ) return stbi__err( "bad filter method", "Corrupt PNG" );
+			interlace = s->get8(); if ( interlace > 1 ) return stbi__err( "bad interlace method", "Corrupt PNG" );
+			if ( !s->img_x || !s->img_y ) return stbi__err( "0-pixel image", "Corrupt PNG" );
+			if ( !pal_img_n )
+			{
+				s->img_n = ( color & 2 ? 3 : 1 ) + ( color & 4 ? 1 : 0 );
+				if ( ( 1 << 30 ) / s->img_x / s->img_n < s->img_y ) return stbi__err( "too large", "Image too large to decode" );
+				if ( scan == STBI__SCAN_header ) return 1;
+			}
+			else
+			{
+				// if paletted, then pal_n is our final components, and
+				// img_n is # components to decompress/filter.
+				s->img_n = 1;
+				if ( ( 1 << 30 ) / s->img_x / 4 < s->img_y ) return stbi__err( "too large", "Corrupt PNG" );
+				// if SCAN_header, have to scan to see if we have a tRNS
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE( 'P', 'L', 'T', 'E' ): {
+			if ( first ) return stbi__err( "first not IHDR", "Corrupt PNG" );
+			if ( c.length > 256 * 3 ) return stbi__err( "invalid PLTE", "Corrupt PNG" );
+			pal_len = c.length / 3;
+			if ( pal_len * 3 != c.length ) return stbi__err( "invalid PLTE", "Corrupt PNG" );
+			for ( i = 0; i < pal_len; ++i )
+			{
+				palette[i * 4 + 0] = s->get8();
+				palette[i * 4 + 1] = s->get8();
+				palette[i * 4 + 2] = s->get8();
+				palette[i * 4 + 3] = 255;
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE( 't', 'R', 'N', 'S' ): {
+			if ( first ) return stbi__err( "first not IHDR", "Corrupt PNG" );
+			if ( z->idata ) return stbi__err( "tRNS after IDAT", "Corrupt PNG" );
+			if ( pal_img_n )
+			{
+				if ( scan == STBI__SCAN_header ) { s->img_n = 4; return 1; }
+				if ( pal_len == 0 ) return stbi__err( "tRNS before PLTE", "Corrupt PNG" );
+				if ( c.length > pal_len ) return stbi__err( "bad tRNS len", "Corrupt PNG" );
+				pal_img_n = 4;
+				for ( i = 0; i < c.length; ++i )
+					palette[i * 4 + 3] = s->get8();
+			}
+			else
+			{
+				if ( !( s->img_n & 1 ) ) return stbi__err( "tRNS with alpha", "Corrupt PNG" );
+				if ( c.length != (uint32)s->img_n * 2 ) return stbi__err( "bad tRNS len", "Corrupt PNG" );
+				has_trans = 1;
+				for ( k = 0; k < s->img_n; ++k )
+					tc[k] = (uchar8)( s->get16be() & 255 ) * stbi__depth_scale_table[depth]; // non 8-bit images will be larger
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE( 'I', 'D', 'A', 'T' ): {
+			if ( first ) return stbi__err( "first not IHDR", "Corrupt PNG" );
+			if ( pal_img_n && !pal_len ) return stbi__err( "no PLTE", "Corrupt PNG" );
+			if ( scan == STBI__SCAN_header ) { s->img_n = pal_img_n; return 1; }
+			if ( (int)( ioff + c.length ) < (int)ioff ) return 0;
+			if ( ioff + c.length > idata_limit )
+			{
+				uchar8 *p;
+				if ( idata_limit == 0 ) idata_limit = c.length > 4096 ? c.length : 4096;
+				while ( ioff + c.length > idata_limit )
+					idata_limit *= 2;
+				p = (uchar8 *)STBI_REALLOC( z->idata, idata_limit ); if ( p == NULL ) return stbi__err( "outofmem", "Out of memory" );
+				z->idata = p;
+			}
+			if ( !s->getn( z->idata + ioff, c.length ) ) return stbi__err( "outofdata", "Corrupt PNG" );
+			ioff += c.length;
+			break;
+		}
+
+		case STBI__PNG_TYPE( 'I', 'E', 'N', 'D' ): {
+			uint32 raw_len, bpl;
+			if ( first ) return stbi__err( "first not IHDR", "Corrupt PNG" );
+			if ( scan != STBI__SCAN_load ) return 1;
+			if ( z->idata == NULL ) return stbi__err( "no IDAT", "Corrupt PNG" );
+			// initial guess for decoded data size to avoid unnecessary reallocs
+			bpl = ( s->img_x * depth + 7 ) / 8; // bytes per line, per component
+			raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+			size_t result_len = raw_len;
+			z->expanded = (uchar8 *)nv::miniz_decompress( (char *)z->idata, ioff, &result_len, !is_iphone );
+			raw_len = result_len;
+
+			if ( z->expanded == NULL ) return 0; // zlib should set error
+			STBI_FREE( z->idata ); z->idata = NULL;
+			if ( ( req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n ) || has_trans )
+				s->img_out_n = s->img_n + 1;
+			else
+				s->img_out_n = s->img_n;
+			if ( !stbi__create_png_image( z, z->expanded, raw_len, s->img_out_n, depth, color, interlace ) ) return 0;
+			if ( has_trans )
+				if ( !stbi__compute_transparency( z, tc, s->img_out_n ) ) return 0;
+			if ( is_iphone && stbi__de_iphone_flag && s->img_out_n > 2 )
+				stbi__de_iphone( z );
+			if ( pal_img_n )
+			{
+				// pal_img_n == 3 or 4
+				s->img_n = pal_img_n; // record the actual colors we had
+				s->img_out_n = pal_img_n;
+				if ( req_comp >= 3 ) s->img_out_n = req_comp;
+				if ( !stbi__expand_png_palette( z, palette, pal_len, s->img_out_n ) )
+					return 0;
+			}
+			STBI_FREE( z->expanded ); z->expanded = NULL;
+			return 1;
+		}
+
+		default:
+			// if critical, fail
+			if ( first ) return stbi__err( "first not IHDR", "Corrupt PNG" );
+			if ( ( c.type & ( 1 << 29 ) ) == 0 )
+			{
+				return stbi__err( invalid_chunk, "PNG not supported: unknown PNG chunk type" );
+			}
+			s->skip( c.length );
+			break;
+		}
+		// end of PNG chunk, read and skip CRC
+		s->get32be();
+	}
+}
+
+static unsigned char *stbi__do_png( stbi__png *p, int *x, int *y, int *n, int req_comp )
+{
+	unsigned char *result = NULL;
+	if ( req_comp < 0 || req_comp > 4 ) return stbi__errpuc( "bad req_comp", "Internal error" );
+	if ( stbi__parse_png_file( p, STBI__SCAN_load, req_comp ) )
+	{
+		result = p->out;
+		p->out = NULL;
+		if ( req_comp && req_comp != p->s->img_out_n )
+		{
+			result = stbi__convert_format( result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y );
+			p->s->img_out_n = req_comp;
+			if ( result == NULL ) return result;
+		}
+		*x = p->s->img_x;
+		*y = p->s->img_y;
+		if ( n ) *n = p->s->img_out_n;
+	}
+	STBI_FREE( p->out );      p->out = NULL;
+	STBI_FREE( p->expanded ); p->expanded = NULL;
+	STBI_FREE( p->idata );    p->idata = NULL;
+
+	return result;
+}
+
+static unsigned char *stbi__png_load( stbi__context *s, int *x, int *y, int *comp, int req_comp )
+{
+	stbi__png p;
+	p.s = s;
+	return stbi__do_png( &p, x, y, comp, req_comp );
+}
+
+static int stbi__png_test( stbi__context *s )
+{
+	int r;
+	r = stbi__check_png_header( s );
+	s->rewind();
+	return r;
+}
+
+static int stbi__png_info_raw( stbi__png *p, int *x, int *y, int *comp )
+{
+	if ( !stbi__parse_png_file( p, STBI__SCAN_header, 0 ) )
+	{
+		p->s->rewind();
+		return 0;
+	}
+	if ( x ) *x = p->s->img_x;
+	if ( y ) *y = p->s->img_y;
+	if ( comp ) *comp = p->s->img_n;
+	return 1;
+}
+
+static int stbi__png_info( stbi__context *s, int *x, int *y, int *comp )
+{
+	stbi__png p;
+	p.s = s;
+	return stbi__png_info_raw( &p, x, y, comp );
+}
+
+static int stbi__stream_read( void *user, char *data, int size )
+{
+	return reinterpret_cast<stream*>( user )->read( data, 1, size );
+}
+
+static void stbi__stream_skip( void *user, int n )
+{
+	reinterpret_cast<stream*>( user )->seek( n, origin::CUR );
+}
+
+static int stbi__stream_eof( void *user )
+{
+	return reinterpret_cast<stream*>( user )->eof();
+}
+
+static stbi_io_callbacks stbi__callbacks =
+{
+	stbi__stream_read,
+	stbi__stream_skip,
+	stbi__stream_eof,
+};
+
+unsigned char * stbi_load( stream* f, int *x, int *y, int *comp, int req_comp )
+{
+	unsigned char *result;
+	stbi__context s( &stbi__callbacks, (void *)f );
+	result = stbi__png_load( &s, x, y, comp, req_comp );
+
+	if ( result )
+	{
+		// need to 'unget' all the characters in the IO buffer
+		f->seek( -s.remaining(), origin::CUR );
+	}
+	return result;
+}
+
+
+png_loader::png_loader() {}
+
+bool nv::png_loader::get_info( stream& str, image_format& format, ivec2& size )
+{
+	size_t pos = str.tell();
+	stbi__context s( &stbi__callbacks, (void *)&str );
+	int x, y;
+	int comp;
+	if ( stbi__png_info( &s, &x, &y, &comp ) == 1 )
+	{
+		str.seek( pos, origin::SET );
+		format.type = UBYTE;
+		switch ( comp )
+		{
+		case 0: return false;
+		case 1: format.format = RED; break;
+		case 3: format.format = RGB; break;
+		case 4: format.format = RGBA; break;
+		default: return false;
+		}
+		size = ivec2( x, y );
+		return true;
+	}
+	return false;
+}
+
+image_data* nv::png_loader::load( stream& s )
+{
+	int x, y;
+	int comp;
+
+	stbi__context ctx( &stbi__callbacks, (void *)&s );
+	unsigned char *result;
+	result = stbi__png_load( &ctx, &x, &y, &comp, 0 );
+	if ( result )
+	{
+		// need to 'unget' all the characters in the IO buffer
+		s.seek( -ctx.remaining(), origin::CUR );
+		image_format format;
+		ivec2 size;
+		format.type = UBYTE;
+		switch ( comp )
+		{
+		case 1: format.format = RED; break;
+		case 3: format.format = RGB; break;
+		case 4: format.format = RGBA; break;
+		default: return false;
+		}
+		size = ivec2( x, y );
+		return new image_data( format, size, result );
+	}
+	return nullptr;
+}
+
+image_data* nv::png_loader::load( stream& s, image_format format )
+{
+	NV_ASSERT( format.type == UBYTE, "!" );
+	int rcomp = 0;
+	switch ( format.format )
+	{
+	case RED: rcomp = 1; break;
+	case RGB: rcomp = 3; break;
+	case RGBA: rcomp = 4; break;
+	default: NV_ASSERT( false, "bad format requested!" ); return nullptr;
+	}
+	int x, y;
+	int comp;
+	stbi__context ctx( &stbi__callbacks, (void *)&s );
+	unsigned char* result = stbi__png_load( &ctx, &x, &y, &comp, rcomp );
+	if ( result )
+	{
+		s.seek( -ctx.remaining(), origin::CUR );
+		image_format fmt;
+		ivec2 sz;
+		fmt.type = UBYTE;
+		switch ( comp )
+		{
+		case 1: fmt.format = RED; break;
+		case 3: fmt.format = RGB; break;
+		case 4: fmt.format = RGBA; break;
+		default: NV_ASSERT( false, "UNKNOWN RESULT!" );
+		}
+		sz = ivec2( x, y );
+		return new image_data( fmt, sz, result );
+	}
+	return nullptr;
+}
Index: trunk/src/io/c_stream.cc
===================================================================
--- trunk/src/io/c_stream.cc	(revision 483)
+++ trunk/src/io/c_stream.cc	(revision 484)
@@ -93,2 +93,10 @@
 	}
 }
+
+bool nv::c_stream::eof()
+{
+	if ( m_file != nullptr )
+	{
+		return ::feof( reinterpret_cast<FILE*>( m_file ) );
+	}
+}