cd_morris
asked on
Parse a cell with sentences
How can I parse a cell in column B by Sentences.
example in Cell B2 I have the the following:
1. This is a test.
2. This is the other test.
What I want is to have one cell with:
1. This is a test.
And in the cell below:
2. This is the other test.
example in Cell B2 I have the the following:
1. This is a test.
2. This is the other test.
What I want is to have one cell with:
1. This is a test.
And in the cell below:
2. This is the other test.
If you sort on that cloumn, you should get what you want.
BTW: are the cell's numbered as in your example?
Hello,
if the two sentences are in one cell and they look like in your example, then there is probably a line feed character between the sentences, so the second sentence appears on a new line.
you could use a formula along these lines
=LEFT(B2,FIND(CHAR(10),B2) -1) -- to get the first sentence
=MID(B2,FIND(CHAR(10),B2)+ 1,99) -- to get the second sentence
cheers, teylyn
if the two sentences are in one cell and they look like in your example, then there is probably a line feed character between the sentences, so the second sentence appears on a new line.
you could use a formula along these lines
=LEFT(B2,FIND(CHAR(10),B2)
=MID(B2,FIND(CHAR(10),B2)+
cheers, teylyn
Another possibility is to use the Text to Columns dialog with the Delimited option. Tick Other and enter
Alt-0010
as the delimiter character and click Finish. You can then copy the resulting cells and paste special/transpose them to arrange them underneath each other.
With the text to columns option, you will manipulate the source data. With the formula option as above, the source data will remain unchanged, but the formula option does not work well for larger data sets.
Alt-0010
as the delimiter character and click Finish. You can then copy the resulting cells and paste special/transpose them to arrange them underneath each other.
With the text to columns option, you will manipulate the source data. With the formula option as above, the source data will remain unchanged, but the formula option does not work well for larger data sets.
ASKER
jocasio123: Sorting will not split the sentences in to different cell.
teylyn: I have a long column to do this to so a vba solution would perhaps work best.
example.xlsx
teylyn: I have a long column to do this to so a vba solution would perhaps work best.
example.xlsx
What do you mean with "long column"? Many cells with two sentences? A cell with many sentences?
Please provide a data sample that is more representative of your issue.
cheers, teylyn
Please provide a data sample that is more representative of your issue.
cheers, teylyn
ASKER
This code works but it splits after the first period:
Sub SplitBySentence()
' splits a paragraph into multiple rows by sentence
Dim r As Range
Dim iR As Long
Dim nR As Long
Dim asSent() As String
Dim nSent As Integer
Dim iSent As Integer
Dim sCAR As String
Dim sCDR As String
Dim iPrd As Integer ' start of ". " in string
Dim iCln As Integer ' start of ": " in string
If Selection.Areas.Count > 2 Then
MsgBox "Cannot do this to a multi-area selection."
Exit Sub
ElseIf Selection.Columns.Count > 2 Then
MsgBox "Cannot do this on a multi-column selection."
Exit Sub
End If
' doing this for multiple cells would louse up the selection _
range unless you start from the bottom and worked upward _
hence the (1,1) below. Fix later if worthwhile.
Set r = Intersect(Selection, ActiveSheet.UsedRange)
nR = r.Rows.Count
r.Select
For iR = nR To 1 Step -1
With r(iR, 1)
.Select
If Not .HasFormula And Not IsNumeric(.Value) Then
' replace non-breaking space with space
sCDR = Replace(.Value, Chr(160), " ")
' trim and add a trailing blank:
sCDR = Application.Trim(sCDR) & " "
If sCDR <> " " Then
.ClearContents
nSent = 0
' parse into substrings ending in ". " or ": "
Do
iPrd = InStr(1, sCDR, ". ")
iCln = InStr(1, sCDR, ": ")
If iCln > 0 And iCln < iPrd Then iPrd = iCln
sCAR = Left(sCDR, iPrd)
If sCAR = "" And sCDR <> "" Then
sCAR = sCDR
sCDR = ""
End If
If sCAR = "" Then Exit Do
'Debug.Print "sCAR = " & sCAR
nSent = nSent + 1
ReDim Preserve asSent(1 To nSent)
asSent(nSent) = sCAR
sCDR = Mid(sCDR, iPrd + 2) ' one past the space
'Debug.Print "sCDR = " & sCDR
Loop
.Value = asSent(1)
For iSent = nSent To 2 Step -1
.Offset(1).EntireRow.Inser t
.Offset(1).Value = asSent(iSent)
Next
End If
End If
End With
Next
End Sub
Sub SplitBySentence()
' splits a paragraph into multiple rows by sentence
Dim r As Range
Dim iR As Long
Dim nR As Long
Dim asSent() As String
Dim nSent As Integer
Dim iSent As Integer
Dim sCAR As String
Dim sCDR As String
Dim iPrd As Integer ' start of ". " in string
Dim iCln As Integer ' start of ": " in string
If Selection.Areas.Count > 2 Then
MsgBox "Cannot do this to a multi-area selection."
Exit Sub
ElseIf Selection.Columns.Count > 2 Then
MsgBox "Cannot do this on a multi-column selection."
Exit Sub
End If
' doing this for multiple cells would louse up the selection _
range unless you start from the bottom and worked upward _
hence the (1,1) below. Fix later if worthwhile.
Set r = Intersect(Selection, ActiveSheet.UsedRange)
nR = r.Rows.Count
r.Select
For iR = nR To 1 Step -1
With r(iR, 1)
.Select
If Not .HasFormula And Not IsNumeric(.Value) Then
' replace non-breaking space with space
sCDR = Replace(.Value, Chr(160), " ")
' trim and add a trailing blank:
sCDR = Application.Trim(sCDR) & " "
If sCDR <> " " Then
.ClearContents
nSent = 0
' parse into substrings ending in ". " or ": "
Do
iPrd = InStr(1, sCDR, ". ")
iCln = InStr(1, sCDR, ": ")
If iCln > 0 And iCln < iPrd Then iPrd = iCln
sCAR = Left(sCDR, iPrd)
If sCAR = "" And sCDR <> "" Then
sCAR = sCDR
sCDR = ""
End If
If sCAR = "" Then Exit Do
'Debug.Print "sCAR = " & sCAR
nSent = nSent + 1
ReDim Preserve asSent(1 To nSent)
asSent(nSent) = sCAR
sCDR = Mid(sCDR, iPrd + 2) ' one past the space
'Debug.Print "sCDR = " & sCDR
Loop
.Value = asSent(1)
For iSent = nSent To 2 Step -1
.Offset(1).EntireRow.Inser
.Offset(1).Value = asSent(iSent)
Next
End If
End If
End With
Next
End Sub
What separates the two in B2? Is it a carriage return line feed?
Something like this would work, but you have to replace the vbCrLf with the delimiter you are using:
Sub SplitCells()
Dim SeparateCell As Variant
Dim iIndex As Integer
SeparateCell = Split(Range("B2").Value, vbCrLf)
For iIndex = LBound(SepCell) To UBound(SeparateCell)
Cells(intIndex + 1, 3).Value = SeparateCell(iIndex)
Next
End Sub
This would put the results in column 3 (C) and start at C1
Sub SplitCells()
Dim SeparateCell As Variant
Dim iIndex As Integer
SeparateCell = Split(Range("B2").Value, vbCrLf)
For iIndex = LBound(SepCell) To UBound(SeparateCell)
Cells(intIndex + 1, 3).Value = SeparateCell(iIndex)
Next
End Sub
This would put the results in column 3 (C) and start at C1
BTW I had a typo in that:
Sub SplitCells()
Dim SeparateCell As Variant
Dim iIndex As Integer
SeparateCell = Split(Range("B2").Value, vbCrLf)
For iIndex = LBound(SeparateCell) To UBound(SeparateCell)
Cells(iIndex + 1, 3).Value = SeparateCell(iIndex)
Next
End Sub
Sub SplitCells()
Dim SeparateCell As Variant
Dim iIndex As Integer
SeparateCell = Split(Range("B2").Value, vbCrLf)
For iIndex = LBound(SeparateCell) To UBound(SeparateCell)
Cells(iIndex + 1, 3).Value = SeparateCell(iIndex)
Next
End Sub
You could replace the vbCrLf with Chr(10)
This may work for your purpose.
Good luck!
This may work for your purpose.
Good luck!
ASKER
This does not work!
Did you try:
Sub SplitCells()
Dim SeparateCell As Variant
Dim iIndex As Integer
SeparateCell = Split(Range("B2").Value, Chr(10))
For iIndex = LBound(SeparateCell) To UBound(SeparateCell)
Cells(iIndex + 1, 3).Value = SeparateCell(iIndex)
Next
End Sub
What happens when you run it?
Sub SplitCells()
Dim SeparateCell As Variant
Dim iIndex As Integer
SeparateCell = Split(Range("B2").Value, Chr(10))
For iIndex = LBound(SeparateCell) To UBound(SeparateCell)
Cells(iIndex + 1, 3).Value = SeparateCell(iIndex)
Next
End Sub
What happens when you run it?
ASKER
The code I provided works but how do I get it to split after the second period (.)?
BTW I ran it against what you sent and it works perfectly. I'm a bit perplexed :-\
ASKER
Sorry, Wrong cell selected. But how do get it to work for multiply cell in the B range?
I developed a solution using Regular Expressions. For more on Regular Expressions, here's a very good overview by the author of the enhanced functions used in this solution, matthewspatrick: http:/A_1336.html.
This appears to be a very simple solution, where we're looking for "[0-9]." which means a number followed by a period. The application attached (and see code below) parses the subject string, by using the function RegExpFind command, searching for the n-th occurrance of a number followed by a period. Once found, it delivers a mid() function on the original string from one instance to the next. Also a Clean/Trim is performed basically to remove excess spaces and most hidden characters, but for sure the carriage return line feed, which we don't need upon parsing.
See code:
See attached, which provides two demo's. The one at the top is a button you can push to parse anything in "B2" as requested, replacing that and successive rows down, for as many "sentences" (defined as number followed by period to the next number/period combination). The one in the middle/bottom works by selecting first on a string to parse and pushing the button - with results pasted columns to the right (example, only).
If the carriage return linefeed is the only determining factor, the RegExp search string is much simpler, but the code is similar - this time looking for the line feed as the END of the sentence. See second button below the first that only parses based on CRLF. See code, below:
See attached demo worksheet & enjoy!
Dave
Breaking-sentences-using-RegExp-.xls
This appears to be a very simple solution, where we're looking for "[0-9]." which means a number followed by a period. The application attached (and see code below) parses the subject string, by using the function RegExpFind command, searching for the n-th occurrance of a number followed by a period. Once found, it delivers a mid() function on the original string from one instance to the next. Also a Clean/Trim is performed basically to remove excess spaces and most hidden characters, but for sure the carriage return line feed, which we don't need upon parsing.
See code:
Sub BreakLinesDownReplacing()
Dim i As Integer, cPosition As Variant, iPos As Integer
Dim lastStart As Integer
Dim outCursor As Range
Dim stringParse As String
stringParse = Range("B2").Value
Set outCursor = Range("B2")
lastStart = 1 'beginning of the string, for starters
i = 2 'start with second instance position
Do
cPosition = RegExpFind(stringParse, "[1-9].", i, False, 1)
If cPosition = "" Then
iPos = 0
Else
iPos = cPosition
outCursor.Value = Application.WorksheetFunction.Clean(Trim(Mid(stringParse, lastStart, iPos - lastStart)))
lastStart = iPos
Set outCursor = outCursor.Offset(1, 0) 'move on to the right
End If
i = i + 1
Loop Until iPos = 0
'Then, wrap-up, if any leftovers...
outCursor.Value = Application.WorksheetFunction.Clean(Trim(Mid(stringParse, lastStart, Len(stringParse) - lastStart)))
End Sub
See attached, which provides two demo's. The one at the top is a button you can push to parse anything in "B2" as requested, replacing that and successive rows down, for as many "sentences" (defined as number followed by period to the next number/period combination). The one in the middle/bottom works by selecting first on a string to parse and pushing the button - with results pasted columns to the right (example, only).
If the carriage return linefeed is the only determining factor, the RegExp search string is much simpler, but the code is similar - this time looking for the line feed as the END of the sentence. See second button below the first that only parses based on CRLF. See code, below:
Sub BreakLinesDownReplacing_V2()
Dim i As Integer, cPosition As Variant, iPos As Integer
Dim lastStart As Integer
Dim outCursor As Range
Dim stringParse As String
stringParse = Range("B2").Value
Set outCursor = Range("B2")
lastStart = 1 'beginning of the string, for starters
i = 1 'start with second instance position
Do
cPosition = RegExpFind(stringParse, "\n", i, False, 1)
If cPosition = "" Then
iPos = 0
Else
iPos = cPosition
outCursor.Value = Application.WorksheetFunction.Clean(Trim(Mid(stringParse, lastStart, iPos - lastStart)))
lastStart = iPos
Set outCursor = outCursor.Offset(1, 0) 'move on to the right
End If
i = i + 1
Loop Until iPos = 0
'Then, wrap-up, if any leftovers...
outCursor.Value = Application.WorksheetFunction.Clean(Trim(Mid(stringParse, lastStart, Len(stringParse) - lastStart + 1)))
End Sub
See attached demo worksheet & enjoy!
Dave
Breaking-sentences-using-RegExp-.xls
>>only parses based on CRLF
should read, "only parses based on LF - linefeed"
Dave
should read, "only parses based on LF - linefeed"
Dave
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
From the sample code posted by cd_morris, it appears that the delimiters are the period and colon. If so, then the following macro will parse the text in each cell in a column, adding rows as necessary.
Brad
Sub Splitter()
Dim cel As Range, rg As Range
Dim bFound As Boolean
Dim s As String
Dim i As Long, iStart As Long, j As Long, n As Long, nCol As Long, nn As Long
Dim v As Variant, vSplit As Variant
Application.ScreenUpdating = False
Set rg = Range("B2") 'First cell to be split
nCol = rg.Column
Set rg = Range(rg, Cells(Rows.Count, nCol).End(xlUp)) 'All the data in that column
iStart = rg.Row
n = iStart + rg.Rows.Count - 1 'Last row in data
rg.Replace ".", ".|" 'Use a pipe character as a delimiter. Put one after every period and colon.
rg.Replace ":", ":|"
For i = n To iStart Step -1
Set vSplit = Nothing
s = Cells(i, nCol).Value
vSplit = Split(s & "|", "|")
nn = UBound(vSplit)
bFound = False
For j = nn To 0 Step -1
If Left(vSplit(j), 1) = vbLf Then vSplit(j) = Mid(vSplit(j), 2)
If vSplit(j) <> "" Then
Cells(i + 1, nCol).EntireRow.Insert
Cells(i + 1, nCol).Value = vSplit(j)
bFound = True
End If
Next
If bFound Then Rows(i).EntireRow.Delete
Next
Application.ScreenUpdating = True
End Sub
Brad