bundled_pjproject: Add additional multipart search utils

Added the following APIs:
pjsip_multipart_find_part_by_header()
pjsip_multipart_find_part_by_header_str()
pjsip_multipart_find_part_by_cid_str()
pjsip_multipart_find_part_by_cid_uri()

Change-Id: I6aee3dcf59eb171f93aae0f0564ff907262ef40d
This commit is contained in:
George Joseph 2022-01-21 13:08:11 -07:00 committed by Kevin Harwell
parent 25edc8ba47
commit 3f108867d6
1 changed files with 644 additions and 0 deletions

View File

@ -0,0 +1,644 @@
From b7ecff22e77887626fd8e8608c4dd73bc7b7366f Mon Sep 17 00:00:00 2001
From: George Joseph <gjoseph@sangoma.com>
Date: Tue, 18 Jan 2022 06:14:31 -0700
Subject: [PATCH] Additional multipart improvements
Added the following APIs:
pjsip_multipart_find_part_by_header()
pjsip_multipart_find_part_by_header_str()
pjsip_multipart_find_part_by_cid_str()
pjsip_multipart_find_part_by_cid_uri()
---
pjsip/include/pjsip/sip_multipart.h | 83 ++++++++++
pjsip/src/pjsip/sip_multipart.c | 223 +++++++++++++++++++++++++++
pjsip/src/test/multipart_test.c | 225 +++++++++++++++++++++++++++-
3 files changed, 530 insertions(+), 1 deletion(-)
diff --git a/pjsip/include/pjsip/sip_multipart.h b/pjsip/include/pjsip/sip_multipart.h
index 1c05767c5..c6b82b0b4 100644
--- a/pjsip/include/pjsip/sip_multipart.h
+++ b/pjsip/include/pjsip/sip_multipart.h
@@ -153,6 +153,89 @@ pjsip_multipart_find_part( const pjsip_msg_body *mp,
const pjsip_media_type *content_type,
const pjsip_multipart_part *start);
+/**
+ * Find a body inside multipart bodies which has a header matching the
+ * supplied one. Most useful for finding a part with a specific Content-ID.
+ *
+ * @param pool Memory pool to use for temp space.
+ * @param mp The multipart body.
+ * @param search_hdr Header to search for.
+ * @param start If specified, the search will begin at
+ * start->next part. Otherwise it will begin at
+ * the first part in the multipart bodies.
+ *
+ * @return The first part which has a header matching the
+ * specified one, or NULL if not found.
+ */
+PJ_DECL(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_header(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ void *search_hdr,
+ const pjsip_multipart_part *start);
+
+/**
+ * Find a body inside multipart bodies which has a header matching the
+ * supplied name and value. Most useful for finding a part with a specific
+ * Content-ID.
+ *
+ * @param pool Memory pool to use for temp space.
+ * @param mp The multipart body.
+ * @param hdr_name Header name to search for.
+ * @param hdr_value Header value search for.
+ * @param start If specified, the search will begin at
+ * start->next part. Otherwise it will begin at
+ * the first part in the multipart bodies.
+ *
+ * @return The first part which has a header matching the
+ * specified one, or NULL if not found.
+ */
+PJ_DECL(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_header_str(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ const pj_str_t *hdr_name,
+ const pj_str_t *hdr_value,
+ const pjsip_multipart_part *start);
+
+
+
+/**
+ * Find a body inside multipart bodies which has a Content-ID value matching the
+ * supplied "cid" URI in pj_str form. The "cid:" scheme will be assumed if the
+ * URL doesn't start with it. Enclosing angle brackets will also be handled
+ * correctly if they exist.
+ *
+ * @see RFC2392 Content-ID and Message-ID Uniform Resource Locators
+ *
+ * @param pool Memory pool to use for temp space.
+ * @param mp The multipart body.
+ * @param cid The "cid" URI to search for in pj_str form.
+ *
+ * @return The first part which has a Content-ID header matching the
+ * specified "cid" URI. or NULL if not found.
+ */
+PJ_DECL(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_cid_str(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ pj_str_t *cid);
+
+/**
+ * Find a body inside multipart bodies which has a Content-ID value matching the
+ * supplied "cid" URI.
+ *
+ * @see RFC2392 Content-ID and Message-ID Uniform Resource Locators
+ *
+ * @param pool Memory pool to use for temp space.
+ * @param mp The multipart body.
+ * @param cid The "cid" URI to search for.
+ *
+ * @return The first part which had a Content-ID header matching the
+ * specified "cid" URI. or NULL if not found.
+ */
+PJ_DECL(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_cid_uri(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ pjsip_other_uri *cid_uri);
+
/**
* Parse multipart message.
*
diff --git a/pjsip/src/pjsip/sip_multipart.c b/pjsip/src/pjsip/sip_multipart.c
index e7d722d2e..9d8be55b0 100644
--- a/pjsip/src/pjsip/sip_multipart.c
+++ b/pjsip/src/pjsip/sip_multipart.c
@@ -19,6 +19,7 @@
#include <pjsip/sip_multipart.h>
#include <pjsip/sip_parser.h>
#include <pjlib-util/scanner.h>
+#include <pjlib-util/string.h>
#include <pj/assert.h>
#include <pj/ctype.h>
#include <pj/errno.h>
@@ -416,6 +417,220 @@ pjsip_multipart_find_part( const pjsip_msg_body *mp,
return NULL;
}
+/*
+ * Find a body inside multipart bodies which has the header and value.
+ */
+PJ_DEF(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_header_str(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ const pj_str_t *hdr_name,
+ const pj_str_t *hdr_value,
+ const pjsip_multipart_part *start)
+{
+ struct multipart_data *m_data;
+ pjsip_multipart_part *part;
+ pjsip_hdr *found_hdr;
+ pj_str_t found_hdr_str;
+ pj_str_t found_hdr_value;
+ pj_size_t expected_hdr_slen;
+ pj_size_t buf_size;
+ int hdr_name_len;
+#define REASONABLE_PADDING 32
+#define SEPARATOR_LEN 2
+ /* Must specify mandatory params */
+ PJ_ASSERT_RETURN(mp && hdr_name && hdr_value, NULL);
+
+ /* mp must really point to an actual multipart msg body */
+ PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, NULL);
+
+ /*
+ * We'll need to "print" each header we find to test it but
+ * allocating a buffer of PJSIP_MAX_URL_SIZE is overkill.
+ * Instead, we'll allocate one large enough to hold the search
+ * header name, the ": " separator, the search hdr value, and
+ * the NULL terminator. If we can't print the found header
+ * into that buffer then it can't be a match.
+ *
+ * Some header print functions such as generic_int require enough
+ * space to print the maximum possible header length so we'll
+ * add a reasonable amount to the print buffer size.
+ */
+ expected_hdr_slen = hdr_name->slen + SEPARATOR_LEN + hdr_value->slen;
+ buf_size = expected_hdr_slen + REASONABLE_PADDING;
+ found_hdr_str.ptr = pj_pool_alloc(pool, buf_size);
+ found_hdr_str.slen = 0;
+ hdr_name_len = hdr_name->slen + SEPARATOR_LEN;
+
+ m_data = (struct multipart_data*)mp->data;
+
+ if (start)
+ part = start->next;
+ else
+ part = m_data->part_head.next;
+
+ while (part != &m_data->part_head) {
+ found_hdr = NULL;
+ while ((found_hdr = pjsip_hdr_find_by_name(&part->hdr, hdr_name,
+ (found_hdr ? found_hdr->next : NULL))) != NULL) {
+
+ found_hdr_str.slen = pjsip_hdr_print_on((void*) found_hdr, found_hdr_str.ptr, buf_size);
+ /*
+ * If the buffer was too small (slen = -1) or the result wasn't
+ * the same length as the search header, it can't be a match.
+ */
+ if (found_hdr_str.slen != expected_hdr_slen) {
+ continue;
+ }
+ /*
+ * Set the value overlay to start at the found header value...
+ */
+ found_hdr_value.ptr = found_hdr_str.ptr + hdr_name_len;
+ found_hdr_value.slen = found_hdr_str.slen - hdr_name_len;
+ /* ...and compare it to the supplied header value. */
+ if (pj_strcmp(hdr_value, &found_hdr_value) == 0) {
+ return part;
+ }
+ }
+ part = part->next;
+ }
+ return NULL;
+#undef SEPARATOR_LEN
+#undef REASONABLE_PADDING
+}
+
+PJ_DEF(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_header(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ void *search_for,
+ const pjsip_multipart_part *start)
+{
+ struct multipart_data *m_data;
+ pjsip_hdr *search_hdr = search_for;
+ pj_str_t search_buf;
+
+ /* Must specify mandatory params */
+ PJ_ASSERT_RETURN(mp && search_hdr, NULL);
+
+ /* mp must really point to an actual multipart msg body */
+ PJ_ASSERT_RETURN(mp->print_body==&multipart_print_body, NULL);
+
+ /*
+ * Unfortunately, there isn't enough information to determine
+ * the maximum printed size of search_hdr at this point so we
+ * have to allocate a reasonable max.
+ */
+ search_buf.ptr = pj_pool_alloc(pool, PJSIP_MAX_URL_SIZE);
+ search_buf.slen = pjsip_hdr_print_on(search_hdr, search_buf.ptr, PJSIP_MAX_URL_SIZE - 1);
+ if (search_buf.slen <= 0) {
+ return NULL;
+ }
+ /*
+ * Set the header value to start after the header name plus the ":", then
+ * strip leading and trailing whitespace.
+ */
+ search_buf.ptr += (search_hdr->name.slen + 1);
+ search_buf.slen -= (search_hdr->name.slen + 1);
+ pj_strtrim(&search_buf);
+
+ return pjsip_multipart_find_part_by_header_str(pool, mp, &search_hdr->name, &search_buf, start);
+}
+
+/*
+ * Convert a Content-ID URI to it's corresponding header value.
+ * RFC2392 says...
+ * A "cid" URL is converted to the corresponding Content-ID message
+ * header by removing the "cid:" prefix, converting the % encoded
+ * character(s) to their equivalent US-ASCII characters, and enclosing
+ * the remaining parts with an angle bracket pair, "<" and ">".
+ *
+ * This implementation will accept URIs with or without the "cid:"
+ * scheme and optional angle brackets.
+ */
+static pj_str_t cid_uri_to_hdr_value(pj_pool_t *pool, pj_str_t *cid_uri)
+{
+ pj_size_t cid_len = pj_strlen(cid_uri);
+ pj_size_t alloc_len = cid_len + 2 /* for the leading and trailing angle brackets */;
+ pj_str_t uri_overlay;
+ pj_str_t cid_hdr;
+ pj_str_t hdr_overlay;
+
+ pj_strassign(&uri_overlay, cid_uri);
+ /* If the URI is already enclosed in angle brackets, remove them. */
+ if (uri_overlay.ptr[0] == '<') {
+ uri_overlay.ptr++;
+ uri_overlay.slen -= 2;
+ }
+ /* If the URI starts with the "cid:" scheme, skip over it. */
+ if (pj_strncmp2(&uri_overlay, "cid:", 4) == 0) {
+ uri_overlay.ptr += 4;
+ uri_overlay.slen -= 4;
+ }
+ /* Start building */
+ cid_hdr.ptr = pj_pool_alloc(pool, alloc_len);
+ cid_hdr.ptr[0] = '<';
+ cid_hdr.slen = 1;
+ hdr_overlay.ptr = cid_hdr.ptr + 1;
+ hdr_overlay.slen = 0;
+ pj_strcpy_unescape(&hdr_overlay, &uri_overlay);
+ cid_hdr.slen += hdr_overlay.slen;
+ cid_hdr.ptr[cid_hdr.slen] = '>';
+ cid_hdr.slen++;
+
+ return cid_hdr;
+}
+
+PJ_DEF(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_cid_str(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ pj_str_t *cid)
+{
+ struct multipart_data *m_data;
+ pjsip_multipart_part *part;
+ pjsip_generic_string_hdr *found_hdr;
+ pj_str_t found_hdr_value;
+ static pj_str_t hdr_name = { "Content-ID", 10};
+ pj_str_t hdr_value;
+
+ PJ_ASSERT_RETURN(pool && mp && cid && (pj_strlen(cid) > 0), NULL);
+
+ hdr_value = cid_uri_to_hdr_value(pool, cid);
+ if (pj_strlen(&hdr_value) == 0) {
+ return NULL;
+ }
+
+ m_data = (struct multipart_data*)mp->data;
+ part = m_data->part_head.next;
+
+ while (part != &m_data->part_head) {
+ found_hdr = NULL;
+ while ((found_hdr = pjsip_hdr_find_by_name(&part->hdr, &hdr_name,
+ (found_hdr ? found_hdr->next : NULL))) != NULL) {
+ if (pj_strcmp(&hdr_value, &found_hdr->hvalue) == 0) {
+ return part;
+ }
+ }
+ part = part->next;
+ }
+ return NULL;
+}
+
+PJ_DEF(pjsip_multipart_part*)
+pjsip_multipart_find_part_by_cid_uri(pj_pool_t *pool,
+ const pjsip_msg_body *mp,
+ pjsip_other_uri *cid_uri)
+{
+ PJ_ASSERT_RETURN(pool && mp && cid_uri, NULL);
+
+ if (pj_strcmp2(&cid_uri->scheme, "cid") != 0) {
+ return NULL;
+ }
+ /*
+ * We only need to pass the URI content so we
+ * can do that directly.
+ */
+ return pjsip_multipart_find_part_by_cid_str(pool, mp, &cid_uri->content);
+}
+
/* Parse a multipart part. "pct" is parent content-type */
static pjsip_multipart_part *parse_multipart_part(pj_pool_t *pool,
char *start,
@@ -584,6 +799,7 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_parse(pj_pool_t *pool,
(int)boundary.slen, boundary.ptr));
}
+
/* Build the delimiter:
* delimiter = "--" boundary
*/
@@ -630,6 +846,8 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_parse(pj_pool_t *pool,
if (*curptr=='\r') ++curptr;
if (*curptr!='\n') {
/* Expecting a newline here */
+ PJ_LOG(2, (THIS_FILE, "Failed to find newline"));
+
return NULL;
}
++curptr;
@@ -645,6 +863,7 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_parse(pj_pool_t *pool,
curptr = pj_strstr(&subbody, &delim);
if (!curptr) {
/* We're really expecting end delimiter to be found. */
+ PJ_LOG(2, (THIS_FILE, "Failed to find end-delimiter"));
return NULL;
}
}
@@ -670,9 +889,13 @@ PJ_DEF(pjsip_msg_body*) pjsip_multipart_parse(pj_pool_t *pool,
part = parse_multipart_part(pool, start_body, end_body - start_body,
ctype);
if (part) {
+ TRACE_((THIS_FILE, "Adding part"));
pjsip_multipart_add_part(pool, body, part);
+ } else {
+ PJ_LOG(2, (THIS_FILE, "Failed to add part"));
}
}
+ TRACE_((THIS_FILE, "pjsip_multipart_parse finished: %p", body));
return body;
}
diff --git a/pjsip/src/test/multipart_test.c b/pjsip/src/test/multipart_test.c
index 4f16e68bf..97267a290 100644
--- a/pjsip/src/test/multipart_test.c
+++ b/pjsip/src/test/multipart_test.c
@@ -28,6 +28,7 @@
typedef pj_status_t (*verify_ptr)(pj_pool_t*,pjsip_msg_body*);
static pj_status_t verify1(pj_pool_t *pool, pjsip_msg_body *body);
+static pj_status_t verify2(pj_pool_t *pool, pjsip_msg_body *body);
static struct test_t
{
@@ -68,7 +69,41 @@ static struct test_t
"This is epilogue, which should be ignored too",
&verify1
+ },
+ {
+ /* Content-type */
+ "multipart", "mixed", "12345",
+
+ /* Body: */
+ "This is the prolog, which should be ignored.\r\n"
+ "--12345\r\n"
+ "Content-Type: text/plain\r\n"
+ "Content-ID: <header1@example.org>\r\n"
+ "Content-ID: <\"header1\"@example.org>\r\n"
+ "Content-Length: 13\r\n"
+ "\r\n"
+ "has header1\r\n"
+ "--12345 \t\r\n"
+ "Content-Type: application/pidf+xml\r\n"
+ "Content-ID: <my header2@example.org>\r\n"
+ "Content-ID: <my\xffheader2@example.org>\r\n"
+ "Content-Length: 13\r\n"
+ "\r\n"
+ "has header2\r\n"
+ "--12345\r\n"
+ "Content-Type: text/plain\r\n"
+ "Content-ID: <my header3@example.org>\r\n"
+ "Content-ID: <header1@example.org>\r\n"
+ "Content-ID: <my header4@example.org>\r\n"
+ "Content-Length: 13\r\n"
+ "\r\n"
+ "has header4\r\n"
+ "--12345--\r\n"
+ "This is epilogue, which should be ignored too",
+
+ &verify2
}
+
};
static void init_media_type(pjsip_media_type *mt,
@@ -87,6 +122,192 @@ static void init_media_type(pjsip_media_type *mt,
}
}
+static int verify_hdr(pj_pool_t *pool, pjsip_msg_body *multipart_body,
+ void *hdr, char *part_body)
+{
+ pjsip_media_type mt;
+ pjsip_multipart_part *part;
+ pj_str_t the_body;
+
+
+ part = pjsip_multipart_find_part_by_header(pool, multipart_body, hdr, NULL);
+ if (!part) {
+ return -1;
+ }
+
+ the_body.ptr = (char*)part->body->data;
+ the_body.slen = part->body->len;
+
+ if (pj_strcmp2(&the_body, part_body) != 0) {
+ return -2;
+ }
+
+ return 0;
+}
+
+static int verify_cid_str(pj_pool_t *pool, pjsip_msg_body *multipart_body,
+ pj_str_t cid_url, char *part_body)
+{
+ pjsip_media_type mt;
+ pjsip_multipart_part *part;
+ pj_str_t the_body;
+
+ part = pjsip_multipart_find_part_by_cid_str(pool, multipart_body, &cid_url);
+ if (!part) {
+ return -3;
+ }
+
+ the_body.ptr = (char*)part->body->data;
+ the_body.slen = part->body->len;
+
+ if (pj_strcmp2(&the_body, part_body) != 0) {
+ return -4;
+ }
+
+ return 0;
+}
+
+static int verify_cid_uri(pj_pool_t *pool, pjsip_msg_body *multipart_body,
+ pjsip_other_uri *cid_uri, char *part_body)
+{
+ pjsip_media_type mt;
+ pjsip_multipart_part *part;
+ pj_str_t the_body;
+
+ part = pjsip_multipart_find_part_by_cid_uri(pool, multipart_body, cid_uri);
+ if (!part) {
+ return -5;
+ }
+
+ the_body.ptr = (char*)part->body->data;
+ the_body.slen = part->body->len;
+
+ if (pj_strcmp2(&the_body, part_body) != 0) {
+ return -6;
+ }
+
+ return 0;
+}
+
+static pj_status_t verify2(pj_pool_t *pool, pjsip_msg_body *body)
+{
+ int rc = 0;
+ int rcbase = 300;
+ pjsip_other_uri *cid_uri;
+ pjsip_ctype_hdr *ctype_hdr = pjsip_ctype_hdr_create(pool);
+
+ ctype_hdr->media.type = pj_str("application");
+ ctype_hdr->media.subtype = pj_str("pidf+xml");
+
+ rc = verify_hdr(pool, body, ctype_hdr, "has header2");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("cid:header1@example.org"), "has header1");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("%22header1%22@example.org"), "has header1");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ cid_uri = pjsip_uri_get_uri(pjsip_parse_uri(pool, "<cid:%22header1%22@example.org>",
+ strlen("<cid:%22header1%22@example.org>"), 0));
+ rcbase += 10;
+ rc = verify_cid_uri(pool, body, cid_uri, "has header1");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("<cid:my%20header2@example.org>"), "has header2");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("cid:my%ffheader2@example.org"), "has header2");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ cid_uri = pjsip_uri_get_uri(pjsip_parse_uri(pool, "<cid:my%ffheader2@example.org>",
+ strlen("<cid:my%ffheader2@example.org>"), 0));
+ rcbase += 10;
+ rc = verify_cid_uri(pool, body, cid_uri, "has header2");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("cid:my%20header3@example.org"), "has header4");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("<cid:my%20header4@example.org>"), "has header4");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ cid_uri = pjsip_uri_get_uri(pjsip_parse_uri(pool, "<cid:my%20header4@example.org>",
+ strlen("<cid:my%20header4@example.org>"), 0));
+ rcbase += 10;
+ rc = verify_cid_uri(pool, body, cid_uri, "has header4");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("<my%20header3@example.org>"), "has header4");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ /* These should all fail for malformed or missing URI */
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("cid:"), "has header4");
+ if (!rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str(""), "has header4");
+ if (!rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("<>"), "has header4");
+ if (!rc) {
+ return (rc - rcbase);
+ }
+
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("<cid>"), "has header4");
+ if (!rc) {
+ return (rc - rcbase);
+ }
+
+ /*
+ * This is going to pass but the ' ' in the uri is un-encoded which is invalid
+ * so we should never see it.
+ */
+ rcbase += 10;
+ rc = verify_cid_str(pool, body, pj_str("cid:my header3@example.org"), "has header4");
+ if (rc) {
+ return (rc - rcbase);
+ }
+
+ return 0;
+}
+
static int verify_part(pjsip_multipart_part *part,
char *h_content_type,
char *h_content_subtype,
@@ -236,8 +457,10 @@ static int parse_test(void)
pj_strdup2_with_null(pool, &str, p_tests[i].msg);
body = pjsip_multipart_parse(pool, str.ptr, str.slen, &ctype, 0);
- if (!body)
+ if (!body) {
+ pj_pool_release(pool);
return -100;
+ }
if (p_tests[i].verify) {
rc = p_tests[i].verify(pool, body);
--
2.34.1