Parse JSON into a hierarchical table

Guy Hengel [angelIII / a3]Billing Engineer
CERTIFIED EXPERT
Published:
JSON is being used more and more, besides XML, and you surely wanted to parse the data out into SQL instead of doing it in some Javascript. The below function in SQL Server can do the job for you, returning a quick table with the parsed data.
I came up with this article when in my job, I had a request to (fetch and) and parse JSON data inside of a SQL Server procedure.

So, I can provide here the function that is the outcome of that work, for your free usage. It should be noted that it will just do the "raw" parsing, but not "decode" the JSON string values, and does also not determine if the numbers defined are really in a correct notation.

The rules I considered to implement in this function are from this page: http://www.json.org/
 

create function dbo.jsontotable(@json varchar(max))
                      returns @table table (id int identity, name varchar(max), value varchar(max), level int , object_type varchar(100) )
                      as
                      begin
                        -- http://www.json.org/
                        -- we want to partse out objects:
                        -- object : { string : value <, repeat> }
                        -- string : " <anything except \ and "> or <\ followed by "\/bfnrt>  or < \unnnn >  repeat"
                        -- array : [  value <, repeat>]
                        -- value : object (starting with { ) or string starting with " ) or array (starting with [ ) or true or false or null or number
                        -- number optionally -    digit(s)  optionally . followed by digits  and/or optionall e/E followed by digits
                        -- so, we part character by character, waiting for the next special start/end to happen
                        declare @pos int
                        declare @len int
                        declare @c char(1)
                      
                        declare @level int = 0
                      
                        declare @status int = 0
                        -- 0: should start object : {
                        -- 1: should start name :   "
                        -- 2: waiting for end of string (") of name
                        -- 3: should get :
                        -- 4: check value start : could be " for value or [ for array  or { for object or true or false or null or number  could be ] as array end
                        -- 5: waiting for end of string (") of value (special handling of \)
                        -- 9: waiting for end of number
                        --10: checking next part, could be , to continue, or ] end of array or } end object
                        --11: checking next array part, could be status 0 or 1
                      
                        declare @name varchar(max)
                        declare @value varchar(max)
                        declare @name_start int
                        declare @value_start int
                       
                        declare @object_type varchar(100)
                        declare @array_levels varchar(100) = ','
                      
                        set @pos = 1
                        set @len = len(@json)
                      
                       
                        while @pos <= @len
                        begin
                          set @c = substring(@json, @pos, 1)
                      
                          if @c = ' '
                          or @c = char(9) -- tab
                          or @c = char(10) -- nl
                          or @c = char(13) -- cr
                          begin
                              -- skip this character to ignore whitespace between tokens
                              goto next_loop
                          end
                      
                          -- waiting for object start
                          if @status in (0 ,11)
                          begin
                              if @c = '{'
                              begin    
                                  insert into @table ( name , object_type, value, level) values (null, 'object' , null, @level)                 
                                  set @status = 1 -- wait for object name, starting with "
                                  set @level = @level + 1
                              end
                              else
                              begin
                                  if @status = 0
                                  begin
                                      insert into @table ( object_type ) values ( 'expected object start' )
                                      return
                                  end
                              end
                              goto next_loop
                          end -- if @status = 0
                      
                          --waiting for pair name
                          if @status in ( 1,11)
                          begin
                              if @c = '"'
                              begin
                                  set @name_start = @pos+1
                                  set @object_type = 'value'
                                  set @status = 2
                              end
                              else
                              begin
                                  if @status = 1
                                  begin
                                      insert into @table ( object_type, value , level) values ( 'expected string start' , @c, @level)
                                      return
                                  end
                              end
                              goto next_loop
                          end
                      
                          --waiting for string end
                          if @status in ( 2,5 )
                          begin
                              if @c = '\'
                              begin
                                  --skip 1 position to bypass any " at that place
                                  set @pos = @pos + 1
                              end
                              else
                              begin
                                  if @c = '"'
                                  begin
                                      if @status = 2
                                      begin
                                          -- get the unescaped name (may contain \ ...)
                                          set @name = substring(@json, @name_start, @pos - @name_start)
                                          set @status = 3
                                      end
                                      else
                                      begin
                                          -- get the unescaped value (may contain \ ...)
                                          set @value = substring(@json, @value_start, @pos - @value_start)
                                          insert into @table ( object_type, value, name, level ) values ( @object_type, @value , @name, @level)
                                          -- after the value, check what comes next
                                          set @status = 4
                                      end
                                  end
                              end
                              goto next_loop
                          end -- if @status in ( 2,5 )
                      
                          --checking for : to delimit the name:value pair
                          if @status = 3
                          begin
                            if @c = ':'
                            begin
                              set @status = 4
                            end
                            else
                            begin
                              insert into @table ( object_type, value , level) values ( 'expected column' , @c, @level)
                              return
                            end
                            goto next_loop
                          end -- if @status in ( 3 )
                      
                          if @status in ( 4 )
                          begin
                              if @c = '"'
                              begin
                                --if @array_levels like '%,'  +cast(@level as varchar(10)) + ',%'
                                begin
                                    set @value_start = @pos + 1
                                    set @status = 5            
                                end
                              end
                              
                              if @c = ']'
                              begin
                                  --go down 1 level
                                  set @level = @level - 1
                                  --we continue to wait for an object or next step
                                  set @status = 4
                              end
                                      -- 4: check value start : could be " for value or [ for array  or { for object or true or false or null or number  could be ] as array end
                              if @c = '['
                              begin
                                  set @object_type = 'array'
                                  insert into @table ( name , object_type, level) values (@name, @object_type , @level)
                                  --go up 1 level
                                  set @object_type = 'item'
                                  set @name = null
                                  set @level = @level + 1
                                  --we continue to wait for an object
                                  set @status = 4
                              end
                      
                              if @c = '{'
                              begin
                                  set @object_type = 'object'
                                  --we want now to start the object name
                                  set @status = 0
                                  set @pos = @pos - 1
                              end
                              if @c = '}'
                              begin
                                  --go down 1 level
                                  set @level = @level - 1
                                  --what's next
                                  set @status = 4            
                              end
                              if @c = 'n'
                              begin -- starting null?
                                  if substring(@json, @pos, 4) = 'null'
                                  begin
                                      insert into @table ( name , object_type, level) values (@name, 'null', @level)                 
                                      set @pos = @pos +3
                                      set @status = 4
                                  end
                                  else
                                  begin
                                      insert into @table ( value , object_type, level) values (substring(@json, @pos, 4), 'expected null', @level)                                 
                                  end
                                  --what comes next?
                                  set @status = 4
                              end
                      
                              if @c = 't'
                              begin -- starting null?
                                  if substring(@json, @pos, 4) = 'true'
                                  begin
                                      insert into @table ( name , object_type, value, level) values (@name, 'boolean' , 'true', @level)                 
                                      set @pos = @pos +3
                                      set @status = 4
                                  end
                                  else
                                  begin
                                      insert into @table ( value , object_type, level) values (substring(@json, @pos, 4), 'expected true', @level)                                 
                                  end
                                  --what comes next?
                                  set @status = 4
                              end
                      
                              if @c = 'f'
                              begin -- starting null?
                                  if substring(@json, @pos, 5) = 'false'
                                  begin
                                      insert into @table ( name , object_type, value, level) values (@name, 'boolean' , 'false', @level)                 
                                      set @pos = @pos +4
                                      set @status = 4
                                  end
                                  else
                                  begin
                                      insert into @table ( value , object_type, level) values (substring(@json, @pos, 5), 'expected false', @level)                                 
                                  end
                                  --what comes next?
                                  set @status = 4
                              end
                      
                              if @c = ','
                              begin
                                set @name = null
                                --maybe object  or pair
                                -- are we inside an object or an array;
                                select top 1 @status = case when object_type = 'object' then 1 else 4 end   
                                  from @table where level = @level - 1 order by id desc
                                
                              end
                      
                              if @c like '[0-9-]'
                              begin
                                  set @status = 9
                                  set @value_start = @pos
                                  set @object_type ='number'
                              end
                              
                              goto next_loop
                            end-- if @status in ( 4 )
                      
                      
                            if @status = 9
                            begin
                              if @c like '[eE.0-9+-]'
                              begin
                                  -- continue waiting
                                  set @status = 9
                              end
                              else
                              begin
                                  set @value = substring(@json, @value_start, @pos - @value_start)
                                  insert into @table ( object_type, value, name, level ) values ( @object_type, @value , @name, @level)
                                  --get next part
                                  set @status = 4
                                  set @pos = @pos -1
                              end
                              goto next_loop
                      
                            end
                      
                          --insert into @table ( name ) values (@c)
                          next_loop:
                          set @pos = @pos + 1
                        end -- loop
                      
                        return
                      end

Open in new window

image.gifimage.gif Here a small usage example:


declare @json varchar(max)
                      set @json = ' {    "id"
                       : "9 \\a","ob":[{"a":true}
                       ,{"a":"C","c":-1.4e+4}]
                       ,"x":["a","B"]
                       }'
                      select * from dbo.jsontotable(@json )

Open in new window

image.gifimage.gifWhich has the following output, indicating the following columns:
  • id: identity column, order of the parsing
  • name: name of value pair
  • value: value of value pair
  • level: the hierarchy level
  • object type: (object, array, value, boolean, number, null ...)
EE_SQL_JSON.bmpI tested this function with a complex JSON string 50kb in size; it parses in less than a second.
 
1
2,991 Views
Guy Hengel [angelIII / a3]Billing Engineer
CERTIFIED EXPERT

Comments (1)

Guy Hengel [angelIII / a3]Billing Engineer
CERTIFIED EXPERT
Most Valuable Expert 2014
Top Expert 2009

Author

Commented:
While using this, I found that a field "parentid" will be helpful, so I added this into the function like this:

create function dbo.jsontotable(@json varchar(max))
returns @table table (id int identity, name varchar(max), value varchar(max), level int , object_type varchar(100) , parentid int )
as
begin
  -- http://www.json.org/
  -- we want to partse out objects:
  -- object : { string : value <, repeat> }
  -- string : " <anything except \ and "> or <\ followed by "\/bfnrt>  or < \unnnn >  repeat"
  -- array : [  value <, repeat>]
  -- value : object (starting with { ) or string starting with " ) or array (starting with [ ) or true or false or null or number
  -- number optionally -    digit(s)  optionally . followed by digits  and/or optionall e/E followed by digits
  -- so, we part character by character, waiting for the next special start/end to happen
  declare @pos int
  declare @len int
  declare @c char(1)

  declare @level int = 0

  declare @status int = 0
  -- 0: should start object : {
  -- 1: should start name :   "
  -- 2: waiting for end of string (") of name
  -- 3: should get :
  -- 4: check value start : could be " for value or [ for array  or { for object or true or false or null or number  could be ] as array end
  -- 5: waiting for end of string (") of value (special handling of \)
  -- 9: waiting for end of number
  --10: checking next part, could be , to continue, or ] end of array or } end object
  --11: checking next array part, could be status 0 or 1

  declare @name varchar(max)
  declare @value varchar(max)
  declare @name_start int
  declare @value_start int
  declare @parentid int
 
  declare @object_type varchar(100)
  declare @array_levels varchar(100) = ','

  set @pos = 1
  set @len = len(@json)

 
  while @pos <= @len
  begin
    set @c = substring(@json, @pos, 1)

    if @c = ' '
    or @c = char(9) -- tab
    or @c = char(10) -- nl
    or @c = char(13) -- cr
    begin
        -- skip this character to ignore whitespace between tokens
        goto next_loop
    end

    -- waiting for object start
    if @status in (0 ,11)
    begin
        if @c = '{'
        begin    
			select @parentid  = max(id) from @table where level = @level-1
            insert into @table ( name , object_type, value, level, parentid) values (null, 'object' , null, @level, @parentid )                 
            set @status = 1 -- wait for object name, starting with "
            set @level = @level + 1
        end
        else
        begin
            if @status = 0
            begin
				insert into @table ( object_type ) values ( 'expected object start' )
                return
            end
        end
        goto next_loop
    end -- if @status = 0

    --waiting for pair name
    if @status in ( 1,11)
    begin
        if @c = '"'
        begin
            set @name_start = @pos+1
            set @object_type = 'value'
            set @status = 2
        end
        else
        begin
            if @status = 1
            begin
                insert into @table ( object_type, value , level) values ( 'expected string start' , @c, @level)
                return
            end
        end
        goto next_loop
    end

    --waiting for string end
    if @status in ( 2,5 )
    begin
        if @c = '\'
        begin
            --skip 1 position to bypass any " at that place
            set @pos = @pos + 1
        end
        else
        begin
            if @c = '"'
            begin
                if @status = 2
                begin
                    -- get the unescaped name (may contain \ ...)
                    set @name = substring(@json, @name_start, @pos - @name_start)
                    set @status = 3
                end
                else
                begin
                    -- get the unescaped value (may contain \ ...)
                    set @value = substring(@json, @value_start, @pos - @value_start)
					select @parentid  = max(id) from @table where level = @level-1
                    insert into @table ( object_type, value, name, level, parentid) values ( @object_type, @value , @name, @level, @parentid)
                    -- after the value, check what comes next
                    set @status = 4
                end
            end
        end
        goto next_loop
    end -- if @status in ( 2,5 )

    --checking for : to delimit the name:value pair
    if @status = 3
    begin
      if @c = ':'
      begin
        set @status = 4
      end
      else
      begin
        insert into @table ( object_type, value , level) values ( 'expected column' , @c, @level)
        return
      end
      goto next_loop
    end -- if @status in ( 3 )

    if @status in ( 4 )
    begin
        if @c = '"'
        begin
          --if @array_levels like '%,'  +cast(@level as varchar(10)) + ',%'
          begin
              set @value_start = @pos + 1
              set @status = 5            
          end
        end
        
        if @c = ']'
        begin
            --go down 1 level
            set @level = @level - 1
            --we continue to wait for an object or next step
            set @status = 4
        end
                -- 4: check value start : could be " for value or [ for array  or { for object or true or false or null or number  could be ] as array end
        if @c = '['
        begin
            set @object_type = 'array'
			select @parentid  = max(id) from @table where level = @level-1
            insert into @table ( name , object_type, level, parentid) values (@name, @object_type , @level,@parentid)
            --go up 1 level
            set @object_type = 'item'
            set @name = null
            set @level = @level + 1
            --we continue to wait for an object
            set @status = 4
        end

        if @c = '{'
        begin
            set @object_type = 'object'
            --we want now to start the object name
            set @status = 0
            set @pos = @pos - 1
        end
        if @c = '}'
        begin
            --go down 1 level
            set @level = @level - 1
            --what's next
            set @status = 4            
        end
        if @c = 'n'
        begin -- starting null?
			select @parentid  = max(id) from @table where level = @level-1
            if substring(@json, @pos, 4) = 'null'
            begin
                insert into @table ( name , object_type, level, parentid) values (@name, 'null', @level, @parentid	)                 
                set @pos = @pos +3
                set @status = 4
            end
            else
            begin
                insert into @table ( value , object_type, level, parentid) values (substring(@json, @pos, 4), 'expected null', @level, @parentid)                                 
            end
            --what comes next?
            set @status = 4
        end

        if @c = 't'
        begin -- starting null?
			select @parentid  = max(id) from @table where level = @level-1
            if substring(@json, @pos, 4) = 'true'
            begin
                insert into @table ( name , object_type, value, level, parentid) values (@name, 'boolean' , 'true', @level, @parentid)                 
                set @pos = @pos +3
                set @status = 4
            end
            else
            begin
                insert into @table ( value , object_type, level, parentid) values (substring(@json, @pos, 4), 'expected true', @level, @parentid)                                 
            end
            --what comes next?
            set @status = 4
        end

        if @c = 'f'
        begin -- starting null?
			select @parentid  = max(id) from @table where level = @level-1
            if substring(@json, @pos, 5) = 'false'
            begin
                insert into @table ( name , object_type, value, level, parentid	) values (@name, 'boolean' , 'false', @level, @parentid	)                 
                set @pos = @pos +4
                set @status = 4
            end
            else
            begin
                insert into @table ( value , object_type, level,parentid) values (substring(@json, @pos, 5), 'expected false', @level, @parentid)                                 
            end
            --what comes next?
            set @status = 4
        end

        if @c = ','
        begin
          set @name = null
          --maybe object  or pair
          -- are we inside an object or an array;
          select top 1 @status = case when object_type = 'object' then 1 else 4 end   
            from @table where level = @level - 1 order by id desc
          
        end

        if @c like '[0-9-]'
        begin
            set @status = 9
            set @value_start = @pos
            set @object_type ='number'
        end
        
        goto next_loop
      end-- if @status in ( 4 )


      if @status = 9
      begin
        if @c like '[eE.0-9+-]'
        begin
            -- continue waiting
            set @status = 9
        end
        else
        begin
            set @value = substring(@json, @value_start, @pos - @value_start)
			select @parentid  = max(id) from @table where level = @level
            insert into @table ( object_type, value, name, level , parentid ) values ( @object_type, @value , @name, @level, @parentid)
            --get next part
            set @status = 4
            set @pos = @pos -1
        end
        goto next_loop

      end

    --insert into @table ( name ) values (@c)
    next_loop:
    set @pos = @pos + 1
  end -- loop

  return
end

Open in new window

Have a question about something in this article? You can receive help directly from the article author. Sign up for a free trial to get started.