图片文件头标识分析
一个图片文件的后缀名并不能说明这个图片的真正格式什么,读取图片文件的文件头标识可以获取图片的格式。用十六进制编辑器察看图片的文件头
1.JPEG
- 文件头标识 (2 bytes): $ff, $d8 (SOI) (JPEG 文件标识)
- 文件结束标识 (2 bytes): $ff, $d9 (EOI)
2.TGA
- 未压缩的前5字节 00 00 02 00 00
- RLE压缩的前5字节 00 00 10 00 00
3.PNG
- 文件头标识 (8 bytes) 89 50 4E 47 0D 0A 1A 0A
4.GIF
- 文件头标识 (6 bytes) 47 49 46 38 39(37) 61 G I F 8 9 (7) a
5.BMP
- 文件头标识 (2 bytes) 42 4D B M
6.PCX
- 文件头标识 (1 bytes) 0A
7.TIFF
- 文件头标识 (2 bytes) 4D 4D 或 49 49
8.ICO
- 文件头标识 (8 bytes) 00 00 01 00 01 00 20 20
9.CUR
- 文件头标识 (8 bytes) 00 00 02 00 01 00 20 20
10.IFF
- 文件头标识 (4 bytes) 46 4F 52 4D F O R M
11.ANI
- 文件头标识 (4 bytes) 52 49 46 46 R I F F
文件头标识比对
根据这些文件头标识的收集,我可以写一个识别图像格式的模块了。但是在写这个模块之前可以对收集到的文件头标识进行优化,使得程序中字符串比对次数尽量的少。
1.JPEG我们知需要比对文件头的$ff, $d8这两个字符,而不用读取最后的两个结束标识了
2.TGA,ICO,CUR只需比对第三个与第五个字符即可。
3.PNG 比对[89][50]这两个字符。
4.GIF 比对[47][49][46]与第五个字符。
模块代码如下:
'枚举图片格式种类
Public Enum ImageForm
[BMP] = 0
[JPEG] = 1
[GIF87] = 2
[GIF89] = 3
[PNG] = 4
[TGA Normal] = 5 'TGA未压缩
[TGA RLE] = 6 'TGA经过RLE压缩后的
[PCX] = 7
[TIFF] = 8
[ICO] = 9
[CUR] = 10
[IFF] = 11
[ANI] = 12
[Other] = 13
[FileError] = 14
End Enum
常用的图片格式有以下几种。
- PNG
- JPEG
- GIF
- WebP 是 Google 制造的一个图片格式,针对网络上快速传输就行了优化
- TIFF/TIF 在数字影响、遥感、医学等领域中得到了广泛的应用。TIFF文件的后缀是.tif或者.tiff
- HEIC iOS11 后,苹果拍照图片的默认格式
- HEIF 用于存储动态图像
JPGE 二进制数据前两个字节数据为
Hex Signature
FF D8
PNG
Hex Signature
89 50 4E 47 0D 0A 1A 0A
GIF
Hex Signature
47 49 46 38 37 61 or
47 49 46 38 39 61
TIFF
Hex Signature
49 20 49 or
49 49 2A 00 or
4D 4D 00 2B or
4D 4D 00 2A
HEIC
Hex Signature
00
HEIF
Hex Signature
00
WEBP
Hex Signature
52
判断 Webp 为什么是截取 0-12 的长度?转换成 ASCII 之后判断的依据?
在 Google 官方介绍中找到了此图。说明的是:头文件的大小是 12Bytes
WEBP的 header 中写明了 ASCII
是 RIFF
或者 WEBP
Google Developer: https://developers.google.com/speed/webp/docs/riff_container
代码
demo 代码
enum ImageFormat {
case Unknow
case JPEG
case PNG
case GIF
case TIFF
case WebP
case HEIC
case HEIF
}
extension Data {
func getImageFormat() -> ImageFormat {
var buffer = [UInt8](repeating: 0, count: 1)
self.copyBytes(to: &buffer, count: 1)
switch buffer {
case [0xFF]: return .JPEG
case [0x89]: return .PNG
case [0x47]: return .GIF
case [0x49],[0x4D]: return .TIFF
case [0x52] where self.count >= 12:
if let str = String(data: self[0...11], encoding: .ascii), str.hasPrefix("RIFF"), str.hasSuffix("WEBP") {
return .WebP
}
case [0x00] where self.count >= 12:
if let str = String(data: self[8...11], encoding: .ascii) {
let HEICBitMaps = Set(["heic", "heis", "heix", "hevc", "hevx"])
if HEICBitMaps.contains(str) {
return .HEIC
}
let HEIFBitMaps = Set(["mif1", "msf1"])
if HEIFBitMaps.contains(str) {
return .HEIF
}
}
default: break;
}
return .Unknow
}
}
C++ 代码1
Image_file.cpp
#include "config.h"
#include "ImageFile.h"
#include <fstream>
#include <stdio.h>
#include <iostream>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <cstring>
namespace blink {
#define JPEG_FILE_TYPE 1
#define BMP_FILE_TYPE 2
#define PNG_FILE_TYPE 3
#define GIF_FILE_TYPE 4
/*
通过文件头标识判断图片格式,
jpg: FF, D8
bmp: 42 4D
gif: 47 49 46 38
png: 89 50 4E 47
*/
int check_fileType(const unsigned char *buf)
{
if(buf[0] == 0xFF && buf[1] == 0xd8 && buf[2] == 0xFF)
{
return JPEG_FILE_TYPE;
}
if(buf[0] == 0x42 && buf[1] == 0x4d)
{
return BMP_FILE_TYPE;
}
if(buf[0] == 0x47 && buf[1] == 0x49 && buf[2] == 0x46 && buf[3] == 0x38)
{
return GIF_FILE_TYPE;
}
if(buf[0] == 0x89 && buf[1] == 0x50 && buf[2] == 0x4e && buf[3] == 0x47)
{
return PNG_FILE_TYPE;
}
else
return 0;
}
/*在构造函数内获取像素宽高:mwidth、mheigh*/
ImageFile::ImageFile(const String& path)
{
int type;
mpath = path;
mwidth = 0;
mheight = 0;
mtype = "";
src = (char *)path.utf8().data();
int i = 0;
int size;
unsigned char *buff = NULL;
FILE *fp;
if((fp = fopen(src,"rb+")) == NULL)
{
mtype = "The file was not opened!";
return;
}
fseek(fp,0,SEEK_END);
size = ftell(fp);
buff = (unsigned char*)malloc(size);
if(buff)
memset(buff,0,size);
fseek(fp,0,SEEK_SET);
if(fread(buff,1,size,fp)!=size)
{
mtype ="read error!";
return;
}
type = check_fileType(buff);
switch(type)
{
case JPEG_FILE_TYPE:
mtype = "jpg file!";
for(i = 0; i < size ; i++)
{
if(buff[i] == 0xff && buff[i+1] == 0xc0)
{
mwidth = (buff[i+7]<<8) | buff[i+8];
mheight = (buff[i+5]<<8) | buff[i+6];
break;
}
}
break;
case BMP_FILE_TYPE:
mtype = "bmp file!";
for(i = 0; i < size ; i++)
{
if(buff[i] == 0x28 && buff[i+1] == 0x00)
{
mwidth = (buff[i+7]<<24) | buff[i+6]<<16 | buff[i+5]<<8 | buff[i+4];
mheight = (buff[i+11]<<24) | buff[i+10]<<16 | buff[i+9]<<8 | buff[i+8];
break;
}
}
break;
case PNG_FILE_TYPE:
mtype = "png file!";
for(i = 0; i < size ; i++)
{
if(buff[i] == 0x49 && buff[i+1] == 0x48)
{
mheight = (buff[i+8]<<24) | buff[i+9]<<16 | buff[i+10]<<8 | buff[i+11];
mwidth = (buff[i+4]<<24) | buff[i+5]<<16 | buff[i+6]<<8 | buff[i+7];
break;
}
}
break;
case GIF_FILE_TYPE:
mtype = "gif file!";
for(i = 0; i < size ; i++)
{
if(buff[i] == 0x00 && buff[i+1] == 0x2c)
{
mwidth = (buff[i+7]<<8) | buff[i+6];
mheight = (buff[i+9]<<8) | buff[i+8];
break;
}
}
break;
default:
break;
}
fclose(fp);
free(buff);
}
String ImageFile::type() const
{
return mtype;
}
String ImageFile::location() const
{
int length = mpath.length();
int pos = mpath.reverseFind('/');
while (pos == length - 1)
{
pos = mpath.reverseFind('/' ,pos - 1);
length--;
}
if (pos < 0)
{
return "";
}
return mpath.substring(0,pos + 1);
}
String ImageFile::fileName() const
{
int length = mpath.length();
int pos = mpath.reverseFind('/');
while (pos == length - 1)
{
pos = mpath.reverseFind('/' , pos - 1);
length--;
}
if (pos < 0)
{
return "";
}
return mpath.substring(pos + 1,length);
}
double ImageFile::width() const
{
return mwidth;
}
double ImageFile::height() const
{
return mheight;
}
}
image_file.h
#ifndef ImageFile_H
#define ImageFile_H
namespace blink {
class ImageFile {
public:
static ImageFile* create(const String& path)
{
FILE* fS;
fS =fopen(path.utf8().data(),"r");
if(fS !=NULL)
{
int i;
int iLen = path.length() ;
int iPos = path.reverseFind('.');
if (iPos <= 0)
{
return NULL;
}
String name=path.substring(iPos + 1, iLen);
char s1[10];
char s2[]="jpg";
char s3[]="bmp";
char s4[]="gif";
char s5[]="png";
char s6[]="jpeg";
for(i=0;i<name.length();i++)
s1[i] = name[i];
s1[i] = '\0';
if(strncmp(s1,s2,3)==0 || strncmp(s1,s3,3)==0 || strncmp(s1,s4,3)==0 || strncmp(s1,s5,3)==0|| strncmp(s1,s6,4)==0)
return new NGBImageFile(path); //路径正确且图片文件格式是以上四种,创建文件对象
else
return NULL;
}
return NULL;
}
String type() const;
String location() const;
String fileName() const;
double width() const;
double height() const;
private:
ImageFile(const String& path);
char* src;
String mpath;
String mtype;
double mwidth;
double mheight;
};
} // namespace blink
#endif // ImageFile_H
Qt 代码
imageinfo.h
#ifndef IMAGEINFO_H
#define IMAGEINFO_H
#include <QObject>
#include <QDebug>
#include <QUrl>
#include <string>
#include <QSize>
#include <QDate>
class ImageInfo : public QObject
{
Q_OBJECT
enum IMAGE_FORMAT{
BMP_FORMAT,
JPG_FORMAT,
GIF_FORMAT,
PNG_FORMAT,
NVL_FORMAT
};
public:
explicit ImageInfo(QObject *parent = 0);
~ImageInfo();
public:
Q_INVOKABLE QString getImageFormat(QString imageUrl);
Q_INVOKABLE QString getImageSize(QString imageUrl);
Q_INVOKABLE QSize getImageDimension(QString imageUrl);
Q_INVOKABLE QDate getImageDate(QString imageUrl);
Q_INVOKABLE QString getImageTitle(QString imageUrl);
Q_SIGNALS:
public Q_SLOTS :
private:
int getImageFormat(std::string path);
long getBMPSize(std::string path);
long getGIFSize(std::string path);
long getPNGSize(std::string path);
long getJPGSize(std::string path);
QSize getBMPDimension(std::string path);
QSize getPNGDimension(std::string path);
QSize getJPGDimension(std::string path);
QSize getGIFDimension(std::string path);
};
#endif // IMAGEINFO_H
imageinfo.cpp
#include "imageinfo.h"
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <QFile>
#include <QFileInfo>
ImageInfo::ImageInfo(QObject *parent) :
QObject(parent)
{
qDebug() << "---------------------------- image info constructed ";
}
ImageInfo::~ImageInfo()
{
}
QDate ImageInfo::getImageDate(QString imageUrl)
{
QDate date;
if(!imageUrl.isEmpty()) {
QUrl fileUrl(imageUrl);
QString filePath = fileUrl.toLocalFile();
if(QFile::exists(filePath)) {
QFileInfo fileinfo(filePath);
date = fileinfo.lastModified().date();
}
}
return date;
}
//从文件头中读取相应字段以判断图片格式
//详情参看: http://www.garykessler.net/library/file_sigs.html
int ImageInfo::getImageFormat(std::string path)
{
//BMP格式特征码
unsigned char BMPHeader[] = {0x42, 0x4d};
//JPG,JPEG格式特征码
unsigned char JPGHeader1[] = {0xff, 0xd8, 0xff, 0xdb};
unsigned char JPGHeader2[] = {0xff, 0xd8, 0xff, 0xe0};
unsigned char JPGHeader3[] = {0xff, 0xd8, 0xff, 0xe1};
unsigned char JPGHeader4[] = {0xff, 0xd8, 0xff, 0xe2};
unsigned char JPGHeader5[] = {0xff, 0xd8, 0xff, 0xe3};
unsigned char JPGHeader6[] = {0xff, 0xd8, 0xff, 0xe8};
//GIF格式特征码
unsigned char GIFHeader1[] = {0x47, 0x49, 0x46, 0x38, 0x37, 0x61};
unsigned char GIFHeader2[] = {0x47, 0x49, 0x46, 0x38, 0x39, 0x61};
//PNG格式特征码
unsigned char PNGHeader[] = {0x89, 0x50, 0x4E, 0x47};
int count = 0;
int step = 2;
//以二进制方式打开文件并读取前几个字节
unsigned char header[16];
qDebug()<<"文件路径: "<<path.c_str();
std::ifstream readf(path.c_str(), std::ios::binary);
if(!readf.is_open()) {
qDebug()<<"打开文件失败";
return NVL_FORMAT;
}
//先读两个,判断是否BMP格式
for(int i=0; i<step; i++) {
readf>>header[count+i];
}
count = count + step;
if(memcmp(header, BMPHeader, count) == 0) {
qDebug()<<"文件格式特征码:";
for(int i=0; i<count; i++) {
printf("%0x\t",header[i]);
}
printf("\n");
qDebug()<<"BMP格式";
return BMP_FORMAT;
}
//再读两个,判断是否JPG格式、PNG格式
for(int i=0; i<step; i++) {
readf>>header[count+i];
}
count = count + step;
if((memcmp(header, JPGHeader1, count) == 0)
|| (memcmp(header, JPGHeader2, count) == 0)
|| (memcmp(header, JPGHeader3, count) == 0)
|| (memcmp(header, JPGHeader4, count) == 0)
|| (memcmp(header, JPGHeader5, count) == 0)
|| (memcmp(header, JPGHeader6, count) == 0)) {
qDebug()<<"文件格式特征码:";
for(int i=0; i<count; i++) {
printf("%0x\t",header[i]);
}
printf("\n");
qDebug()<<"JPG格式";
return JPG_FORMAT;
} else if(memcmp(header, PNGHeader, count) == 0) {
qDebug()<<"文件格式特征码:";
for(int i=0; i<count; i++) {
printf("%0x\t",header[i]);
}
printf("\n");
qDebug()<<"PNG格式";
return PNG_FORMAT;
}
//再读两个,判断是否GIF格式
for(int i=0; i<step; i++) {
readf>>header[count+i];
}
count = count + step;
if((memcmp(header, GIFHeader1, count) == 0)
|| (memcmp(header, GIFHeader2, count) == 0)) {
qDebug()<<"文件格式特征码:";
for(int i=0; i<count; i++) {
printf("%0x\t",header[i]);
}
printf("\n");
qDebug()<<"GIF格式";
return GIF_FORMAT;
}
qDebug()<<"文件格式特征码:";
for(int i=0; i<count; i++) {
printf("%0x\t",header[i]);
}
printf("\n");
qDebug()<<"不属于以上任何一种格式";
return NVL_FORMAT;
}
QString ImageInfo::getImageFormat(QString imageUrl)
{
QString strFormat = "NA";
if(!imageUrl.isEmpty()) {
QUrl fileUrl(imageUrl);
QString filePath = fileUrl.toLocalFile();
if(QFile::exists(filePath)) {
std::string path = filePath.toStdString();
int iFormat = getImageFormat(path);
switch(iFormat) {
case BMP_FORMAT:
strFormat = "BMP";
break;
case JPG_FORMAT:
strFormat = "JPG";
break;
case GIF_FORMAT:
strFormat = "GIF";
break;
case PNG_FORMAT:
strFormat = "PNG";
break;
default:
break;
}
}
}
return strFormat;
}
QString ImageInfo::getImageSize(QString imageUrl)
{
QString strSize;
long size = 0;
if(!imageUrl.isEmpty()) {
QUrl fileUrl(imageUrl);
QString filePath = fileUrl.toLocalFile();
if(QFile::exists(filePath)) {
QFile file(filePath);
bool ret = file.open(QIODevice::ReadOnly);
if (!ret) {
qDebug()<<"打开文件失败";
size = 0;
} else {
size = file.size();
}
file.close();
}
}
qDebug()<<"!!!!!"<<size;
strSize = QString::number(size, 10);
qDebug()<<strSize;
return strSize;
}
//BMP文件头的第2、3字为文件大小信息
long ImageInfo::getBMPSize(std::string path)
{
FILE *fid;
long int size;
if((fid=fopen(path.c_str(),"rb+"))==NULL) {
qDebug()<<"打开文件失败";
return 0;
}
//跳过图片特征码
fseek(fid, 2, SEEK_SET);
fread(&size, sizeof(long), 1, fid);
fclose(fid);
qDebug()<<"size="<<size;
return size;
}
long ImageInfo::getGIFSize(std::string path)
{
Q_UNUSED(path);
return 0;
}
long ImageInfo::getPNGSize(std::string path)
{
Q_UNUSED(path);
return 0;
}
long ImageInfo::getJPGSize(std::string path)
{
FILE *fid;
long int size;
if((fid = fopen(path.c_str(),"rb+")) == NULL) {
qDebug()<<"打开文件失败";
return 0;
}
fseek(fid, 0, SEEK_END);
size = ftell(fid);
fclose(fid);
qDebug()<<"size="<<size;
return size;
}
//BMP文件头的第10、11字为文件宽度信息
//BMP文件头的第12、13字为文件高度信息
QSize ImageInfo::getBMPDimension(std::string path)
{
FILE *fid;
if((fid=fopen(path.c_str(),"rb+"))==NULL) {
qDebug()<<"打开文件失败";
return QSize(0, 0);
}
long int width;
long int height;
//读取宽度和高度
fseek(fid, 18, SEEK_SET); //偏移18个字节
fread(&width, sizeof(long), 1, fid);
fread(&height, sizeof(long), 1, fid);
qDebug()<<"width="<<width;
qDebug()<<"height="<<height;
fclose(fid);
return QSize(width, height);
}
//参考: http://mcljc.blog.163.com/blog/static/83949820102239610974/
//http://download.csdn.net/download/chp845/4255011
QSize ImageInfo::getJPGDimension(std::string path)
{
FILE *fid;
if((fid = fopen(path.c_str(),"rb+")) == NULL) {
qDebug()<<"打开文件失败";
return QSize(0, 0);
}
long int width;
long int height;
fseek(fid,0,SEEK_END);
long length = ftell(fid);
unsigned char *buffer = new unsigned char[length];
unsigned char *buffer_bakup = buffer;
fseek(fid, 0, SEEK_SET);
fread(buffer, length, 1, fid);
fclose(fid);
unsigned char *temp = buffer + length;
unsigned char *temp_ori = buffer;
unsigned char ff;
unsigned char type=0xff;
int m_pos = 0;
//跳过文件头中标志文件类型的两个字节
for(int i=0; i<2; i++) {
buffer++;
}
while((temp > buffer) && (type != 0xDA)) {
do{
ff = *buffer++;
} while(ff != 0xff);
do{
type = *buffer++;
} while(type == 0xff);
switch(type) {
case 0x00:
case 0x01:
case 0xD0:
case 0xD1:
case 0xD2:
case 0xD3:
case 0xD4:
case 0xD5:
case 0xD6:
case 0xD7:
break;
case 0xC0://SOF0段
temp_ori = buffer;
m_pos = (*buffer++)<<8;
m_pos += *buffer++;
buffer++; //舍弃精度值
height = (*buffer++)<<8;
height += *buffer++;
width = (*buffer++)<<8;
width += *buffer;
break;
case 0xE0: //APP0段
qDebug()<<"APP0段";
temp_ori = buffer;
m_pos = (*buffer++)<<8;
m_pos += *buffer++;
buffer = buffer + 12;
//丢弃APP0标记(5bytes)、主版本号(1bytes)、次版本号(1bytes)、像素点单位(1bytes)、垂直像素点(2bytes)、 水平像素点(2bytes)
break;
default:
temp_ori = buffer;
m_pos = (*buffer++)<<8;
m_pos += *buffer++;
break;
}
buffer = temp_ori + m_pos;
}
qDebug()<<"width="<<width;
qDebug()<<"height="<<height;
//记得释放内存
delete[] buffer_bakup;
return QSize(width, height);
}
//PNG文件头的第9字为文件宽度信息
//PNG文件头的第10字为文件高度信息
//参考:http://blog.chinaunix.net/uid-25799257-id-3358174.html
QSize ImageInfo::getPNGDimension(std::string path)
{
FILE *fid = NULL;
if((fid=fopen(path.c_str(),"rb+"))==NULL) {
qDebug()<<"打开文件失败";
return QSize(0, 0);
}
long int width;
long int height;
unsigned char wtmp[4]={'0'}; //宽度
unsigned char htmp[4]={'0'}; //高度
fseek(fid, 16, SEEK_SET);
fread(wtmp, 4, 1, fid); // example 00000080
fread(htmp, 4, 1, fid); // example 00000080
fclose(fid);
width = ((int)(unsigned char)wtmp[2]) * 256 + (int)(unsigned char)wtmp[3];
height = ((int)(unsigned char)htmp[2]) * 256 + (int)(unsigned char)htmp[3];
qDebug()<<"width="<<width;
qDebug()<<"height="<<height;
return QSize(width, height);
}
//GIF文件头的第4字为文件宽度信息
//GIF文件头的第5字为文件高度信息
//参考:http://blog.csdn.net/zhaoweikid/article/details/156422
//参考:http://blog.csdn.net/asaasa66/article/details/5875340
QSize ImageInfo::getGIFDimension(std::string path)
{
std::ifstream ffin(path.c_str(), std::ios::binary);
if (!ffin){
std::cout<<"Can not open this file."<<std::endl;
return QSize(0, 0);
}
long int width;
long int height;
char s1[2] = {0}, s2[2] = {0};
ffin.seekg(6);
ffin.read(s1, 2);
ffin.read(s2, 2);
width = (unsigned int)(s1[1])<<8|(unsigned int)(s1[0]);
height = (unsigned int)(s2[1])<<8|(unsigned int)(s2[0]);
ffin.close();
qDebug()<<"width="<<width;
qDebug()<<"height="<<height;
return QSize(width, height);
}
QSize ImageInfo::getImageDimension(QString imageUrl)
{
QSize dimension;
if(!imageUrl.isEmpty()) {
QUrl fileUrl(imageUrl);
QString filePath = fileUrl.toLocalFile();
if(QFile::exists(filePath)) {
std::string path = filePath.toStdString();
int iFormat = getImageFormat(path);
switch(iFormat) {
case BMP_FORMAT:
dimension = getBMPDimension(path);
break;
case JPG_FORMAT:
dimension = getJPGDimension(path);
break;
case GIF_FORMAT:
dimension = getGIFDimension(path);
break;
case PNG_FORMAT:
dimension = getPNGDimension(path);
break;
default:
break;
}
}
}
qDebug()<<"图片尺寸:"<<dimension;
return dimension;
}
QString ImageInfo::getImageTitle(QString imageUrl)
{
QString title;
if(!imageUrl.isEmpty()) {
QUrl fileUrl(imageUrl);
QString filePath = fileUrl.toLocalFile();
if(QFile::exists(filePath)) {
QFileInfo fileinfo(filePath);
title = fileinfo.baseName();
}
}
return title;
}