pjproject/pjsip/src/pjsip/sip_multipart.c

648 lines
16 KiB
C

/* $Id$ */
/*
* Copyright (C) 2008-2010 Teluu Inc. (http://www.teluu.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <pjsip/sip_multipart.h>
#include <pjsip/sip_parser.h>
#include <pjlib-util/scanner.h>
#include <pj/assert.h>
#include <pj/ctype.h>
#include <pj/errno.h>
#include <pj/except.h>
#include <pj/guid.h>
#include <pj/log.h>
#include <pj/pool.h>
#include <pj/string.h>
#define THIS_FILE "sip_multipart.c"
#define IS_SPACE(c) ((c)==' ' || (c)=='\t')
#if 0
# define TRACE_(x) PJ_LOG(4,x)
#else
# define TRACE_(x)
#endif
extern pj_bool_t pjsip_use_compact_form;
/* Type of "data" in multipart pjsip_msg_body */
struct multipart_data
{
pj_str_t boundary;
pjsip_multipart_part part_head;
};
static int multipart_print_body(struct pjsip_msg_body *msg_body,
char *buf, pj_size_t size)
{
const struct multipart_data *m_data;
pj_str_t clen_hdr = { "Content-Length: ", 16};
pjsip_multipart_part *part;
char *p = buf, *end = buf+size;
#define SIZE_LEFT() (end-p)
m_data = (const struct multipart_data*)msg_body->data;
PJ_ASSERT_RETURN(m_data && !pj_list_empty(&m_data->part_head), PJ_EINVAL);
part = m_data->part_head.next;
while (part != &m_data->part_head) {
enum { CLEN_SPACE = 5 };
char *clen_pos;
const pjsip_hdr *hdr;
clen_pos = NULL;
/* Print delimiter */
if (SIZE_LEFT() <= (m_data->boundary.slen+8) << 1)
return -1;
*p++ = 13; *p++ = 10; *p++ = '-'; *p++ = '-';
pj_memcpy(p, m_data->boundary.ptr, m_data->boundary.slen);
p += m_data->boundary.slen;
*p++ = 13; *p++ = 10;
/* Print optional headers */
hdr = part->hdr.next;
while (hdr != &part->hdr) {
int printed = pjsip_hdr_print_on((pjsip_hdr*)hdr, p, SIZE_LEFT());
if (printed < 0)
return -1;
p += printed;
hdr = hdr->next;
}
/* Automaticly adds Content-Type and Content-Length headers, only
* if content_type is set in the message body.
*/
if (part->body && part->body->content_type.type.slen) {
pj_str_t ctype_hdr = { "Content-Type: ", 14};
const pjsip_media_type *media = &part->body->content_type;
if (pjsip_use_compact_form) {
ctype_hdr.ptr = "c: ";
ctype_hdr.slen = 3;
}
/* Add Content-Type header. */
if ( (end-p) < 24 + media->type.slen + media->subtype.slen) {
return -1;
}
pj_memcpy(p, ctype_hdr.ptr, ctype_hdr.slen);
p += ctype_hdr.slen;
p += pjsip_media_type_print(p, end-p, media);
*p++ = '\r';
*p++ = '\n';
/* Add Content-Length header. */
if ((end-p) < clen_hdr.slen + 12 + 2) {
return -1;
}
pj_memcpy(p, clen_hdr.ptr, clen_hdr.slen);
p += clen_hdr.slen;
/* Print blanks after "Content-Length:", this is where we'll put
* the content length value after we know the length of the
* body.
*/
pj_memset(p, ' ', CLEN_SPACE);
clen_pos = p;
p += CLEN_SPACE;
*p++ = '\r';
*p++ = '\n';
}
/* Empty newline */
*p++ = 13; *p++ = 10;
/* Print the body */
pj_assert(part->body != NULL);
if (part->body) {
int printed = part->body->print_body(part->body, p, SIZE_LEFT());
if (printed < 0)
return -1;
p += printed;
/* Now that we have the length of the body, print this to the
* Content-Length header.
*/
if (clen_pos) {
char tmp[16];
int len;
len = pj_utoa(printed, tmp);
if (len > CLEN_SPACE) len = CLEN_SPACE;
pj_memcpy(clen_pos+CLEN_SPACE-len, tmp, len);
}
}
part = part->next;
}
/* Print closing delimiter */
if (SIZE_LEFT() < m_data->boundary.slen+8)
return -1;
*p++ = 13; *p++ = 10; *p++ = '-'; *p++ = '-';
pj_memcpy(p, m_data->boundary.ptr, m_data->boundary.slen);
p += m_data->boundary.slen;
*p++ = '-'; *p++ = '-'; *p++ = 13; *p++ = 10;
#undef SIZE_LEFT
return p - buf;
}
static void* multipart_clone_data(pj_pool_t *pool, const void *data,
unsigned len)
{
const struct multipart_data *src;
struct multipart_data *dst;
const pjsip_multipart_part *src_part;
PJ_UNUSED_ARG(len);
src = (const struct multipart_data*) data;
dst = PJ_POOL_ALLOC_T(pool, struct multipart_data);
pj_strdup(pool, &dst->boundary, &src->boundary);
src_part = src->part_head.next;
while (src_part != &src->part_head) {
pjsip_multipart_part *dst_part;
const pjsip_hdr *src_hdr;
dst_part = pjsip_multipart_create_part(pool);
src_hdr = src_part->hdr.next;
while (src_hdr != &src_part->hdr) {
pjsip_hdr *dst_hdr = (pjsip_hdr*)pjsip_hdr_clone(pool, src_hdr);
pj_list_push_back(&dst_part->hdr, dst_hdr);
src_hdr = src_hdr->next;
}
dst_part->body = pjsip_msg_body_clone(pool, src_part->body);
pj_list_push_back(&dst->part_head, dst_part);
src_part = src_part->next;
}
return (void*)dst;
}
/*
* Create an empty multipart body.
*/
PJ_DEF(pjsip_msg_body*) pjsip_multipart_create( pj_pool_t *pool,
const pjsip_media_type *ctype,
const pj_str_t *boundary)
{
pjsip_msg_body *body;
pjsip_param *ctype_param;
struct multipart_data *mp_data;
pj_str_t STR_BOUNDARY = { "boundary", 8 };
PJ_ASSERT_RETURN(pool, NULL);
body = PJ_POOL_ZALLOC_T(pool, pjsip_msg_body);
/* content-type */
if (ctype && ctype->type.slen) {
pjsip_media_type_cp(pool, &body->content_type, ctype);
} else {
const pj_str_t STR_MULTIPART = {"multipart", 9};
const pj_str_t STR_MIXED = { "mixed", 5 };
body->content_type.type = STR_MULTIPART;
body->content_type.subtype = STR_MIXED;
}
/* multipart data */
mp_data = PJ_POOL_ZALLOC_T(pool, struct multipart_data);
pj_list_init(&mp_data->part_head);
if (boundary) {
pj_strdup(pool, &mp_data->boundary, boundary);
} else {
pj_create_unique_string(pool, &mp_data->boundary);
}
body->data = mp_data;
/* Add ";boundary" parameter to content_type parameter. */
ctype_param = pjsip_param_find(&body->content_type.param, &STR_BOUNDARY);
if (!ctype_param) {
ctype_param = PJ_POOL_ALLOC_T(pool, pjsip_param);
ctype_param->name = STR_BOUNDARY;
pj_list_push_back(&body->content_type.param, ctype_param);
}
ctype_param->value = mp_data->boundary;
/* function pointers */
body->print_body = &multipart_print_body;
body->clone_data = &multipart_clone_data;
return body;
}
/*
* Create an empty multipart part.
*/
PJ_DEF(pjsip_multipart_part*) pjsip_multipart_create_part(pj_pool_t *pool)
{
pjsip_multipart_part *mp;
mp = PJ_POOL_ZALLOC_T(pool, pjsip_multipart_part);
pj_list_init(&mp->hdr);
return mp;
}
/*
* Deep clone.
*/
PJ_DEF(pjsip_multipart_part*)
pjsip_multipart_clone_part(pj_pool_t *pool,
const pjsip_multipart_part *src)
{
pjsip_multipart_part *dst;
const pjsip_hdr *hdr;
dst = pjsip_multipart_create_part(pool);
hdr = src->hdr.next;
while (hdr != &src->hdr) {
pj_list_push_back(&dst->hdr, pjsip_hdr_clone(pool, hdr));
hdr = hdr->next;
}
dst->body = pjsip_msg_body_clone(pool, src->body);
return dst;
}
/*
* Add a part into multipart bodies.
*/
PJ_DEF(pj_status_t) pjsip_multipart_add_part( pj_pool_t *pool,
pjsip_msg_body *mp,
pjsip_multipart_part *part)
{
struct multipart_data *m_data;
/* All params must be specified */
PJ_ASSERT_RETURN(pool && mp && part, PJ_EINVAL);
/* mp must really point to an actual multipart msg body */
PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, PJ_EINVAL);
/* The multipart part must contain a valid message body */
PJ_ASSERT_RETURN(part->body && part->body->print_body, PJ_EINVAL);
m_data = (struct multipart_data*)mp->data;
pj_list_push_back(&m_data->part_head, part);
PJ_UNUSED_ARG(pool);
return PJ_SUCCESS;
}
/*
* Get the first part of multipart bodies.
*/
PJ_DEF(pjsip_multipart_part*)
pjsip_multipart_get_first_part(const pjsip_msg_body *mp)
{
struct multipart_data *m_data;
/* Must specify mandatory params */
PJ_ASSERT_RETURN(mp, NULL);
/* mp must really point to an actual multipart msg body */
PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, NULL);
m_data = (struct multipart_data*)mp->data;
if (pj_list_empty(&m_data->part_head))
return NULL;
return m_data->part_head.next;
}
/*
* Get the next part after the specified part.
*/
PJ_DEF(pjsip_multipart_part*)
pjsip_multipart_get_next_part(const pjsip_msg_body *mp,
pjsip_multipart_part *part)
{
struct multipart_data *m_data;
/* Must specify mandatory params */
PJ_ASSERT_RETURN(mp && part, NULL);
/* mp must really point to an actual multipart msg body */
PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, NULL);
m_data = (struct multipart_data*)mp->data;
/* the part parameter must be really member of the list */
PJ_ASSERT_RETURN(pj_list_find_node(&m_data->part_head, part) != NULL,
NULL);
if (part->next == &m_data->part_head)
return NULL;
return part->next;
}
/*
* Find a body inside multipart bodies which has the specified content type.
*/
PJ_DEF(pjsip_multipart_part*)
pjsip_multipart_find_part( const pjsip_msg_body *mp,
const pjsip_media_type *content_type,
const pjsip_multipart_part *start)
{
struct multipart_data *m_data;
pjsip_multipart_part *part;
/* Must specify mandatory params */
PJ_ASSERT_RETURN(mp && content_type, NULL);
/* mp must really point to an actual multipart msg body */
PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, NULL);
m_data = (struct multipart_data*)mp->data;
if (start)
part = start->next;
else
part = m_data->part_head.next;
while (part != &m_data->part_head) {
if (pjsip_media_type_cmp(&part->body->content_type, content_type)==0)
return part;
part = part->next;
}
return NULL;
}
/* Parse a multipart part. "pct" is parent content-type */
static pjsip_multipart_part *parse_multipart_part(pj_pool_t *pool,
char *start,
pj_size_t len,
const pjsip_media_type *pct)
{
pjsip_multipart_part *part = pjsip_multipart_create_part(pool);
char *p = start, *end = start+len, *end_hdr = NULL, *start_body = NULL;
pjsip_ctype_hdr *ctype_hdr = NULL;
TRACE_((THIS_FILE, "Parsing part: begin--\n%.*s\n--end",
(int)len, start));
/* Find the end of header area, by looking at an empty line */
for (;;) {
while (p!=end && *p!='\n') ++p;
if (p==end) {
start_body = end;
break;
}
if ((p==start) || (p==start+1 && *(p-1)=='\r')) {
/* Empty header section */
end_hdr = start;
start_body = ++p;
break;
} else if (p==end-1) {
/* Empty body section */
end_hdr = end;
start_body = ++p;
} else if ((p>=start+1 && *(p-1)=='\n') ||
(p>=start+2 && *(p-1)=='\r' && *(p-2)=='\n'))
{
/* Found it */
end_hdr = (*(p-1)=='\r') ? (p-1) : p;
start_body = ++p;
break;
} else {
++p;
}
}
/* Parse the headers */
if (end_hdr-start > 0) {
pjsip_hdr *hdr;
pj_status_t status;
status = pjsip_parse_headers(pool, start, end_hdr-start,
&part->hdr, 0);
if (status != PJ_SUCCESS) {
PJ_PERROR(2,(THIS_FILE, status, "Warning: error parsing multipart"
" header"));
}
/* Find Content-Type header */
hdr = part->hdr.next;
while (hdr != &part->hdr) {
TRACE_((THIS_FILE, "Header parsed: %.*s", (int)hdr->name.slen,
hdr->name.ptr));
if (hdr->type == PJSIP_H_CONTENT_TYPE) {
ctype_hdr = (pjsip_ctype_hdr*)hdr;
}
hdr = hdr->next;
}
}
/* Assign the body */
part->body = PJ_POOL_ZALLOC_T(pool, pjsip_msg_body);
if (ctype_hdr) {
pjsip_media_type_cp(pool, &part->body->content_type, &ctype_hdr->media);
} else if (pct && pj_stricmp2(&pct->subtype, "digest")==0) {
part->body->content_type.type = pj_str("message");
part->body->content_type.subtype = pj_str("rfc822");
} else {
part->body->content_type.type = pj_str("text");
part->body->content_type.subtype = pj_str("plain");
}
if (start_body < end) {
part->body->data = start_body;
part->body->len = end - start_body;
} else {
part->body->data = (void*)"";
part->body->len = 0;
}
TRACE_((THIS_FILE, "Body parsed: \"%.*s\"", (int)part->body->len,
part->body->data));
part->body->print_body = &pjsip_print_text_body;
part->body->clone_data = &pjsip_clone_text_data;
return part;
}
/* Public function to parse multipart message bodies into its parts */
PJ_DEF(pjsip_msg_body*) pjsip_multipart_parse(pj_pool_t *pool,
char *buf, pj_size_t len,
const pjsip_media_type *ctype,
unsigned options)
{
pj_str_t boundary, delim;
char *curptr, *endptr;
const pjsip_param *ctype_param;
const pj_str_t STR_BOUNDARY = { "boundary", 8 };
pjsip_msg_body *body = NULL;
PJ_ASSERT_RETURN(pool && buf && len && ctype && !options, NULL);
TRACE_((THIS_FILE, "Started parsing multipart body"));
/* Get the boundary value in the ctype */
boundary.ptr = NULL;
boundary.slen = 0;
ctype_param = pjsip_param_find(&ctype->param, &STR_BOUNDARY);
if (ctype_param) {
boundary = ctype_param->value;
TRACE_((THIS_FILE, "Boundary is specified: '%.*s'", (int)boundary.slen,
boundary.ptr));
}
if (!boundary.slen) {
/* Boundary not found or not specified. Try to be clever, get
* the boundary from the body.
*/
char *p=buf, *end=buf+len;
PJ_LOG(4,(THIS_FILE, "Warning: boundary parameter not found or "
"not specified when parsing multipart body"));
/* Find the first "--". This "--" must be right after a CRLF, unless
* it really appears at the start of the buffer.
*/
for (;;) {
while (p!=end && *p!='-') ++p;
if (p!=end && *(p+1)=='-' &&
((p>buf && *(p-1)=='\n') || (p==buf)))
{
p+=2;
break;
} else {
++p;
}
}
if (p==end) {
/* Unable to determine boundary. Maybe this is not a multipart
* message?
*/
PJ_LOG(4,(THIS_FILE, "Error: multipart boundary not specified and"
" unable to calculate from the body"));
return NULL;
}
boundary.ptr = p;
while (p!=end && !pj_isspace(*p)) ++p;
boundary.slen = p - boundary.ptr;
TRACE_((THIS_FILE, "Boundary is calculated: '%.*s'",
(int)boundary.slen, boundary.ptr));
}
/* Build the delimiter:
* delimiter = "--" boundary
*/
delim.slen = boundary.slen+2;
delim.ptr = (char*)pj_pool_alloc(pool, (int)delim.slen);
delim.ptr[0] = '-';
delim.ptr[1] = '-';
pj_memcpy(delim.ptr+2, boundary.ptr, boundary.slen);
/* Start parsing the body, skip until the first delimiter. */
curptr = buf;
endptr = buf + len;
{
pj_str_t body;
body.ptr = buf; body.slen = len;
curptr = pj_strstr(&body, &delim);
if (!curptr)
return NULL;
}
body = pjsip_multipart_create(pool, ctype, &boundary);
for (;;) {
char *start_body, *end_body;
pjsip_multipart_part *part;
/* Eat the boundary */
curptr += delim.slen;
if (*curptr=='-' && curptr<endptr-1 && *(curptr+1)=='-') {
/* Found the closing delimiter */
curptr += 2;
break;
}
/* Optional whitespace after delimiter */
while (curptr!=endptr && IS_SPACE(*curptr)) ++curptr;
/* Mandatory CRLF */
if (*curptr=='\r') ++curptr;
if (*curptr!='\n') {
/* Expecting a newline here */
return NULL;
}
++curptr;
/* We now in the start of the body */
start_body = curptr;
/* Find the next delimiter */
{
pj_str_t subbody;
subbody.ptr = curptr; subbody.slen = endptr - curptr;
curptr = pj_strstr(&subbody, &delim);
if (!curptr) {
/* We're really expecting end delimiter to be found. */
return NULL;
}
}
end_body = curptr;
/* The newline preceeding the delimiter is conceptually part of
* the delimiter, so trim it from the body.
*/
if (*(end_body-1) == '\n')
--end_body;
if (*(end_body-1) == '\r')
--end_body;
/* Now that we have determined the part's boundary, parse it
* to get the header and body part of the part.
*/
part = parse_multipart_part(pool, start_body, end_body - start_body,
ctype);
if (part) {
pjsip_multipart_add_part(pool, body, part);
}
}
return body;
}