你的位置:首页 > Java教程

[Java教程]cJONS序列化工具解读二(数据解析)


cJSON数据解析

关于数据解析部分,其实这个解析就是个自动机,通过递归或者解析栈进行实现数据的解析

/* Utility to jump whitespace and cr/lf */
//用于跳过ascii小于32的空白字符static const char *skip(const char *in) { while (in && *in && (unsigned char)*in <= 32) in++; return in;}/* Parse an object - create a new root, and populate. */cJSON *cJSON_ParseWithOpts(const char *value, const char **return_parse_end, int require_null_terminated){ const char *end = 0; cJSON *c = cJSON_New_Item(); ep = 0; if (!c) return 0; /* memory fail */ //根据前几个字符设置c类型并更新读取位置为end end = parse_value(c, skip(value)); if (!end) { cJSON_Delete(c); //解析失败,数据不完整 return 0; } /* parse failure. ep is set. */ /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ if (require_null_terminated)///?? { end = skip(end); if (*end) { cJSON_Delete(c); ep = end; return 0; } } if (return_parse_end) *return_parse_end = end; return c;}/* Default options for cJSON_Parse */cJSON *cJSON_Parse(const char *value) { return cJSON_ParseWithOpts(value, 0, 0); }

①关于重点部分parse_value 对类型解读函数

/* Parser core - when encountering text, process appropriately. */
//将输入字符串解析为具体类型cJSON结构static const char *parse_value(cJSON *item, const char *value){ if (!value) return 0; /* Fail on null. */

  //设置结构的具体类型并且返回下一个将要解读数据的位置 if (!strncmp(value, "null", 4)) { item->type = cJSON_NULL; return value + 4; } if (!strncmp(value, "false", 5)) { item->type = cJSON_False; return value + 5; } if (!strncmp(value, "true", 4)) { item->type = cJSON_True; item->valueint = 1; return value + 4; } if (*value == '\"') { return parse_string(item, value); } if (*value == '-' || (*value >= '0' && *value <= '9')) { return parse_number(item, value); } if (*value == '[') { return parse_array(item, value); } if (*value == '{') { return parse_object(item, value); } ep = value; return 0; /* failure. */}

②解析字符串部分
解析字符串时, 对于特殊字符也应该转义,比如 "n" 字符应该转换为 'n' 这个换行符。
当然,如果只有特殊字符转换的话,代码不会又这么长, 对于字符串, 还要支持非 ascii 码的字符, 即 utf8字符。
这些字符在字符串中会编码为 uXXXX 的字符串, 我们现在需要还原为 0 - 255 的一个字符。

static unsigned parse_hex4(const char *str){  unsigned h = 0;  if (*str >= '0' && *str <= '9')     h += (*str) - '0';  else if (*str >= 'A' && *str <= 'F')    h += 10 + (*str) - 'A';  else if (*str >= 'a' && *str <= 'f')    h += 10 + (*str) - 'a';   else     return 0;  h = h << 4; //*F  str++;  if (*str >= '0' && *str <= '9')    h += (*str) - '0';   else if (*str >= 'A' && *str <= 'F')    h += 10 + (*str) - 'A';   else if (*str >= 'a' && *str <= 'f')     h += 10 + (*str) - 'a';   else    return 0;  h = h << 4;  str++;  if (*str >= '0' && *str <= '9')    h += (*str) - '0';   else if (*str >= 'A' && *str <= 'F')    h += 10 + (*str) - 'A';  else if (*str >= 'a' && *str <= 'f')    h += 10 + (*str) - 'a';  else return 0;  h = h << 4;   str++;  if (*str >= '0' && *str <= '9')    h += (*str) - '0';   else if (*str >= 'A' && *str <= 'F')    h += 10 + (*str) - 'A';  else if (*str >= 'a' && *str <= 'f')    h += 10 + (*str) - 'a';   else     return 0;  return h;}/* Parse the input text into an unescaped cstring, and populate item. */static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };static const char *parse_string(cJSON *item, const char *str){  const char *ptr = str + 1;  char *ptr2;   char *out;  int len = 0;   unsigned uc, uc2;  if (*str != '\"')   {     ep = str;     return 0;   }  /* not a string! */  while(*ptr != '\"' && *ptr && ++len)    if (*ptr++ == '\\') //跳过\续行符      ptr++;  /* Skip escaped quotes. */  //空间申请  out = (char*)cJSON_malloc(len + 1);  /* This is how long we need for the string, roughly. */  if (!out)     return 0;  ptr = str + 1;//跳过“开始  ptr2 = out;  while (*ptr != '\"' && *ptr)  {    if (*ptr != '\\')      *ptr2++ = *ptr++;    else  //转义字符处理    {      ptr++;      switch (*ptr)      {      case 'b': *ptr2++ = '\b';  break;      case 'f': *ptr2++ = '\f';  break;      case 'n': *ptr2++ = '\n';  break;      case 'r': *ptr2++ = '\r';  break;      case 't': *ptr2++ = '\t';  break;      case 'u':   /* transcode utf16 to utf8. */        uc = parse_hex4(ptr + 1);         ptr += 4;  /* get the unicode char. */        if ((uc >= 0xDC00 && uc <= 0xDFFF) || uc == 0)            break;  /* check for invalid.  */        if (uc >= 0xD800 && uc <= 0xDBFF)  /* UTF16 surrogate pairs.  */        {          if (ptr[1] != '\\' || ptr[2] != 'u')              break;  /* missing second-half of surrogate.  */          uc2 = parse_hex4(ptr + 3);          ptr += 6;          if (uc2<0xDC00 || uc2>0xDFFF)              break;  /* invalid second-half of surrogate.  */          uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF));        }        len = 4;         if (uc<0x80)          len = 1;        else if (uc<0x800)          len = 2;         else if (uc<0x10000)           len = 3;        ptr2 += len;        switch (len)        {        case 4:          *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6;        case 3:          *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6;        case 2:          *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6;        case 1:          *--ptr2 = (uc | firstByteMark[len]);        }        ptr2 += len;        break;      default:        *ptr2++ = *ptr; break;      }      ptr++;    }  }  *ptr2 = 0;  if (*ptr == '\"') ptr++;  item->valuestring = out;  item->type = cJSON_String;  return ptr;}

关于具体的字符解析中的编码相关问题,请自行阅读编码相关知识 

③数字解析

/* Parse the input text to generate a number, and populate the result into item. */static const char *parse_number(cJSON *item, const char *num){  double n = 0, sign = 1, scale = 0;   int subscale = 0,    signsubscale = 1;  if (*num == '-')    sign = -1, num++;  /* Has sign? */  if (*num == '0')     num++;      /* is zero */  if (*num >= '1' && *num <= '9')      do      {      n = (n*10.0) + (*num++ - '0');    }while (*num >= '0' && *num <= '9');  /* Number? */  if (*num == '.' && num[1] >= '0' && num[1] <= '9')  {     num++;        do      n = (n*10.0) + (*num++ - '0'), scale--;    while (*num >= '0' && *num <= '9');   }  /* Fractional part? */  if (*num == 'e' || *num == 'E')    /* Exponent? */  {    num++;    if (*num == '+')      num++;      else if (*num == '-')      signsubscale = -1, num++;    /* With sign? */    while (*num >= '0' && *num <= '9')      subscale = (subscale * 10) + (*num++ - '0');  /* Number? */  }  n = sign*n*pow(10.0, (scale + subscale*signsubscale));  /* number = +/- number.fraction * 10^+/- exponent */  item->valuedouble = n;  item->valueint = (int)n;  item->type = cJSON_Number;  return num;}

④解析数组
解析数组, 需要先遇到 '[' 这个符号, 然后挨个的读取节点内容, 节点使用 ',' 分隔, ',' 前后还可能有空格, 最后以 ']' 结尾。
我们要编写的也是这样。
先创建一个数组对象, 判断是否有儿子, 有的话读取第一个儿子, 然后判断是不是有 逗号, 有的话循环读取后面的儿子。
最后读取 ']' 即可。


/* Build an array from input text. */static const char *parse_array(cJSON *item, const char *value){  cJSON *child;  if (*value != '[')   {    ep = value;    return 0;  }  /* not an array! */  item->type = cJSON_Array;  value = skip(value + 1);  if (*value == ']')    return value + 1;  /* empty array. */  item->child = child = cJSON_New_Item();  if (!item->child)     return 0;     /* memory fail */  //解析数组内结构  value = skip(parse_value(child, skip(value)));  /* skip any spacing, get the value. */  if (!value) return 0;  while (*value == ',')  {    cJSON *new_item;    if (!(new_item = cJSON_New_Item())) return 0;   /* memory fail */    child->next = new_item;     new_item->prev = child;    child = new_item;    value = skip(parse_value(child, skip(value + 1)));    if (!value)      return 0;  /* memory fail */  }  if (*value == ']')    return value + 1;  /* end of array */  ep = value;   return 0;  /* malformed. */}

⑤解析对象

解析对象和解析数组类似, 只不过对象的一个儿子是个 key - value, key 是字符串, value 可能是任何值, key 和 value 用 ":" 分隔。

/* Render an object to text. */static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p){  char **entries = 0, **names = 0;  char *out = 0, *ptr, *ret, *str; int len = 7, i = 0, j;  cJSON *child = item->child;  int numentries = 0, fail = 0;  size_t tmplen = 0;  /* Count the number of entries. */  while (child) numentries++, child = child->next;  /* Explicitly handle empty object case */  if (!numentries)  {    if (p) out = ensure(p, fmt ? depth + 4 : 3);    else  out = (char*)cJSON_malloc(fmt ? depth + 4 : 3);    if (!out)  return 0;    ptr = out; *ptr++ = '{';    if (fmt) { *ptr++ = '\n'; for (i = 0; i<depth - 1; i++) *ptr++ = '\t'; }    *ptr++ = '}'; *ptr++ = 0;    return out;  }  if (p)  {    /* Compose the output: */    i = p->offset;    len = fmt ? 2 : 1;  ptr = ensure(p, len + 1);  if (!ptr) return 0;    *ptr++ = '{';  if (fmt) *ptr++ = '\n';  *ptr = 0;  p->offset += len;    child = item->child; depth++;    while (child)    {      if (fmt)      {        ptr = ensure(p, depth);  if (!ptr) return 0;        for (j = 0; j<depth; j++) *ptr++ = '\t';        p->offset += depth;      }      print_string_ptr(child->string, p);      p->offset = update(p);      len = fmt ? 2 : 1;      ptr = ensure(p, len);  if (!ptr) return 0;      *ptr++ = ':'; if (fmt) *ptr++ = '\t';      p->offset += len;      print_value(child, depth, fmt, p);      p->offset = update(p);      len = (fmt ? 1 : 0) + (child->next ? 1 : 0);      ptr = ensure(p, len + 1); if (!ptr) return 0;      if (child->next) *ptr++ = ',';      if (fmt) *ptr++ = '\n'; *ptr = 0;      p->offset += len;      child = child->next;    }    ptr = ensure(p, fmt ? (depth + 1) : 2);   if (!ptr) return 0;    if (fmt)  for (i = 0; i<depth - 1; i++) *ptr++ = '\t';    *ptr++ = '}'; *ptr = 0;    out = (p->buffer) + i;  }  else  {    /* Allocate space for the names and the objects */    entries = (char**)cJSON_malloc(numentries * sizeof(char*));    if (!entries) return 0;    names = (char**)cJSON_malloc(numentries * sizeof(char*));    if (!names) { cJSON_free(entries); return 0; }    memset(entries, 0, sizeof(char*)*numentries);    memset(names, 0, sizeof(char*)*numentries);    /* Collect all the results into our arrays: */    child = item->child; depth++; if (fmt) len += depth;    while (child)    {      names[i] = str = print_string_ptr(child->string, 0);      entries[i++] = ret = print_value(child, depth, fmt, 0);      if (str && ret) len += strlen(ret) + strlen(str) + 2 + (fmt ? 2 + depth : 0); else fail = 1;      child = child->next;    }    /* Try to allocate the output string */    if (!fail)  out = (char*)cJSON_malloc(len);    if (!out) fail = 1;    /* Handle failure */    if (fail)    {      for (i = 0; i<numentries; i++) { if (names[i]) cJSON_free(names[i]); if (entries[i]) cJSON_free(entries[i]); }      cJSON_free(names); cJSON_free(entries);      return 0;    }    /* Compose the output: */    *out = '{'; ptr = out + 1; if (fmt)*ptr++ = '\n'; *ptr = 0;    for (i = 0; i<numentries; i++)    {      if (fmt) for (j = 0; j<depth; j++) *ptr++ = '\t';      tmplen = strlen(names[i]); memcpy(ptr, names[i], tmplen); ptr += tmplen;      *ptr++ = ':'; if (fmt) *ptr++ = '\t';      strcpy(ptr, entries[i]); ptr += strlen(entries[i]);      if (i != numentries - 1) *ptr++ = ',';      if (fmt) *ptr++ = '\n'; *ptr = 0;      cJSON_free(names[i]); cJSON_free(entries[i]);    }    cJSON_free(names); cJSON_free(entries);    if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = '\t';    *ptr++ = '}'; *ptr++ = 0;  }  return out;}

这样都实现后, 字符串解析为 json 对象就实现了。

⑥序列化

序列化也就是格式化输出了。

序列化又分为格式化输出,压缩输出

 

/* Render a cJSON item/entity/structure to text. */char *cJSON_Print(cJSON *item) {   return print_value(item, 0, 1, 0);}char *cJSON_PrintUnformatted(cJSON *item){  return print_value(item, 0, 0, 0);}char *cJSON_PrintBuffered(cJSON *item, int prebuffer, int fmt){  printbuffer p;  p.buffer = (char*)cJSON_malloc(prebuffer);  p.length = prebuffer;  p.offset = 0;  return print_value(item, 0, fmt, &p);  return p.buffer;}/* Render a value to text. */static char *print_value(cJSON *item, int depth, int fmt, printbuffer *p){  char *out = 0;  if (!item) return 0;  if (p)  {    switch ((item->type) & 255)    {    case cJSON_NULL: {out = ensure(p, 5);  if (out) strcpy(out, "null");  break; }    case cJSON_False: {out = ensure(p, 6);  if (out) strcpy(out, "false");  break; }    case cJSON_True: {out = ensure(p, 5);  if (out) strcpy(out, "true");  break; }    case cJSON_Number:  out = print_number(item, p); break;    case cJSON_String:  out = print_string(item, p); break;    case cJSON_Array:  out = print_array(item, depth, fmt, p); break;    case cJSON_Object:  out = print_object(item, depth, fmt, p); break;    }  }  else  {    switch ((item->type) & 255)    {    case cJSON_NULL:  out = cJSON_strdup("null");  break;    case cJSON_False:  out = cJSON_strdup("false"); break;    case cJSON_True:  out = cJSON_strdup("true"); break;    case cJSON_Number:  out = print_number(item, 0); break;    case cJSON_String:  out = print_string(item, 0); break;    case cJSON_Array:  out = print_array(item, depth, fmt, 0); break;    case cJSON_Object:  out = print_object(item, depth, fmt, 0); break;    }  }  return out;}

 

假设我们要使用格式化输出, 也就是美化输出。

cjson 的做法不是边分析 json 边输出, 而是预先将要输的内容全部按字符串存在内存中, 最后输出整个字符串。

这对于比较大的 json 来说, 内存就是个问题了。

另外,格式化输出依靠的是节点的深度, 这个也可以优化, 一般宽度超过80 时, 就需要从新的一行算起的。

/* Render an object to text. */static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p){  char **entries = 0, **names = 0;  char *out = 0, *ptr, *ret, *str; int len = 7, i = 0, j;  cJSON *child = item->child;  int numentries = 0, fail = 0;  size_t tmplen = 0;  /* Count the number of entries. */  while (child) numentries++, child = child->next;  /* Explicitly handle empty object case */  if (!numentries)  {    if (p) out = ensure(p, fmt ? depth + 4 : 3);    else  out = (char*)cJSON_malloc(fmt ? depth + 4 : 3);    if (!out)  return 0;    ptr = out; *ptr++ = '{';    if (fmt) { *ptr++ = '\n'; for (i = 0; i<depth - 1; i++) *ptr++ = '\t'; }    *ptr++ = '}'; *ptr++ = 0;    return out;  }  if (p)  {    /* Compose the output: */    i = p->offset;    len = fmt ? 2 : 1;  ptr = ensure(p, len + 1);  if (!ptr) return 0;    *ptr++ = '{';  if (fmt) *ptr++ = '\n';  *ptr = 0;  p->offset += len;    child = item->child; depth++;    while (child)    {      if (fmt)      {        ptr = ensure(p, depth);  if (!ptr) return 0;        for (j = 0; j<depth; j++) *ptr++ = '\t';        p->offset += depth;      }      print_string_ptr(child->string, p);      p->offset = update(p);      len = fmt ? 2 : 1;      ptr = ensure(p, len);  if (!ptr) return 0;      *ptr++ = ':'; if (fmt) *ptr++ = '\t';      p->offset += len;      print_value(child, depth, fmt, p);      p->offset = update(p);      len = (fmt ? 1 : 0) + (child->next ? 1 : 0);      ptr = ensure(p, len + 1); if (!ptr) return 0;      if (child->next) *ptr++ = ',';      if (fmt) *ptr++ = '\n'; *ptr = 0;      p->offset += len;      child = child->next;    }    ptr = ensure(p, fmt ? (depth + 1) : 2);   if (!ptr) return 0;    if (fmt)  for (i = 0; i<depth - 1; i++) *ptr++ = '\t';    *ptr++ = '}'; *ptr = 0;    out = (p->buffer) + i;  }  else  {    /* Allocate space for the names and the objects */    entries = (char**)cJSON_malloc(numentries * sizeof(char*));    if (!entries) return 0;    names = (char**)cJSON_malloc(numentries * sizeof(char*));    if (!names) { cJSON_free(entries); return 0; }    memset(entries, 0, sizeof(char*)*numentries);    memset(names, 0, sizeof(char*)*numentries);    /* Collect all the results into our arrays: */    child = item->child; depth++; if (fmt) len += depth;    while (child)    {      names[i] = str = print_string_ptr(child->string, 0);      entries[i++] = ret = print_value(child, depth, fmt, 0);      if (str && ret) len += strlen(ret) + strlen(str) + 2 + (fmt ? 2 + depth : 0); else fail = 1;      child = child->next;    }    /* Try to allocate the output string */    if (!fail)  out = (char*)cJSON_malloc(len);    if (!out) fail = 1;    /* Handle failure */    if (fail)    {      for (i = 0; i<numentries; i++) { if (names[i]) cJSON_free(names[i]); if (entries[i]) cJSON_free(entries[i]); }      cJSON_free(names); cJSON_free(entries);      return 0;    }    /* Compose the output: */    *out = '{'; ptr = out + 1; if (fmt)*ptr++ = '\n'; *ptr = 0;    for (i = 0; i<numentries; i++)    {      if (fmt) for (j = 0; j<depth; j++) *ptr++ = '\t';      tmplen = strlen(names[i]); memcpy(ptr, names[i], tmplen); ptr += tmplen;      *ptr++ = ':'; if (fmt) *ptr++ = '\t';      strcpy(ptr, entries[i]); ptr += strlen(entries[i]);      if (i != numentries - 1) *ptr++ = ',';      if (fmt) *ptr++ = '\n'; *ptr = 0;      cJSON_free(names[i]); cJSON_free(entries[i]);    }    cJSON_free(names); cJSON_free(entries);    if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = '\t';    *ptr++ = '}'; *ptr++ = 0;  }  return out;}

 

static const char *parse_array(cJSON *item, const char *value){    cJSON *child;    if (*value != '[')     {        ep = value;        return 0;    }/* not an array! */
    item->type = cJSON_Array;    value = skip(value + 1);    if (*value == ']')        return value + 1;/* empty array. */
    item->child = child = cJSON_New_Item();    if (!item->child)         return 0; /* memory fail */    //解析数组内结构    value = skip(parse_value(child, skip(value)));/* skip any spacing, get the value. */    if (!value) return 0;
    while (*value == ',')    {        cJSON *new_item;        if (!(new_item = cJSON_New_Item())) return 0; /* memory fail */
        child->next = new_item;         new_item->prev = child;        child = new_item;        value = skip(parse_value(child, skip(value + 1)));        if (!value)            return 0;/* memory fail */    }
    if (*value == ']')        return value + 1;/* end of array */    ep = value;     return 0;/* malformed. */}