Skip to content

Commit

Permalink
More work on FasterXML#405, token stream now includes text segments
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder authored and alex-bel-apica committed Sep 4, 2020
1 parent ab5bc49 commit b31bad6
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ public JsonToken nextToken() throws IOException
StaxUtil.throwAsParseException(e, this);
}
if (_parsingContext.inArray()) {
if (_isEmpty(_currText)) {
if (XmlTokenStream._allWs(_currText)) {
// 06-Jan-2015, tatu: as per [dataformat-xml#180], need to
// expose as empty Object, not null (or, worse, as used to
// be done, by swallowing the token)
Expand All @@ -622,7 +622,7 @@ public JsonToken nextToken() throws IOException
// but... [dataformat-xml#191]: looks like we can't short-cut, must
// loop over again
if (_parsingContext.inObject()) {
if ((_currToken != JsonToken.FIELD_NAME) && _isEmpty(_currText)) {
if ((_currToken != JsonToken.FIELD_NAME) && XmlTokenStream._allWs(_currText)) {
try {
token = _xmlTokens.next();
} catch (XMLStreamException e) {
Expand Down Expand Up @@ -1056,19 +1056,6 @@ protected ByteArrayBuilder _getByteArrayBuilder()
return _byteArrayBuilder;
}

protected boolean _isEmpty(String str)
{
int len = (str == null) ? 0 : str.length();
if (len > 0) {
for (int i = 0; i < len; ++i) {
if (str.charAt(i) > ' ') {
return false;
}
}
}
return true;
}

private <T> T _internalErrorUnknownToken(Object token) {
throw new IllegalStateException("Internal error: unrecognized XmlTokenStream token: "+token);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ public class XmlTokenStream
protected boolean _xsiNilFound;

/**
* If true we have a START_ELEMENT with mixed text
* Flag set true if current event is {@code XML_TEXT} and there is START_ELEMENT
*
* @since 2.8
* @since 2.12
*/
protected boolean _mixedText;
protected boolean _startElementAfterText;

/**
* Index of the next attribute of the current START_ELEMENT
Expand Down Expand Up @@ -384,30 +384,30 @@ private final int _next() throws XMLStreamException
final boolean startElementNext = _xmlReader.getEventType() == XMLStreamReader.START_ELEMENT;
// If we have no/all-whitespace text followed by START_ELEMENT, ignore text
if (startElementNext) {
if (text == null || _allWs(text)) {
_mixedText = false;
if (_allWs(text)) {
_startElementAfterText = false;
return _initStartElement();
}
_mixedText = true;
_startElementAfterText = true;
_textValue = text;
return (_currentState = XML_TEXT);
}
// For END_ELEMENT we will return text, if any
if (text != null) {
_mixedText = false;
_startElementAfterText = false;
_textValue = text;
return (_currentState = XML_TEXT);
}
_mixedText = false;
_startElementAfterText = false;
return _handleEndElement();

case XML_ATTRIBUTE_NAME:
// if we just returned name, will need to just send value next
return (_currentState = XML_ATTRIBUTE_VALUE);
case XML_TEXT:
// mixed text with other elements
if (_mixedText) {
_mixedText = false;
if (_startElementAfterText) {
_startElementAfterText = false;
return _initStartElement();
}
// text followed by END_ELEMENT
Expand All @@ -422,8 +422,19 @@ private final int _next() throws XMLStreamException
case XMLStreamConstants.END_DOCUMENT:
return (_currentState = XML_END);
case XMLStreamConstants.END_ELEMENT:
// 24-May-2020, tatu: Need to see if we have "mixed content" to offer
if (!_allWs(_textValue)) {
// _textValue already set
return (_currentState = XML_TEXT);
}
return _handleEndElement();
}
// 24-May-2020, tatu: Need to see if we have "mixed content" to offer
if (!_allWs(_textValue)) {
// _textValue already set
_startElementAfterText = true;
return (_currentState = XML_TEXT);
}

// START_ELEMENT...
return _initStartElement();
Expand Down Expand Up @@ -697,7 +708,7 @@ private JsonLocation _extractLocation(XMLStreamLocation2 location)
location.getColumnNumber());
}

protected boolean _allWs(String str)
protected static boolean _allWs(String str)
{
final int len = (str == null) ? 0 : str.length();
if (len > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,15 @@ public void testXmlAttributes() throws Exception
xp.close();
}

public void testMixedContent() throws Exception
{
String exp = a2q("{'':'first','a':'123','':'second','b':'456','':'last'}");
String result = _readXmlWriteJson("<root>first<a>123</a>second<b>456</b>last</root>");

//System.err.println("result = \n"+result);
assertEquals(exp, result);
}

/*
/**********************************************************
/* Helper methods
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ public void testNested() throws Exception
}

// For [dataformat-xml#402]
/* public void testMixedContent() throws Exception
public void testMixedContentBetween() throws Exception
{
String XML = "<root>first<a>123</a> and second <b>abc</b>last &amp; final</root>";
String XML = "<root>first<a>123</a> and second <b>abc</b>\n</root>";
XMLStreamReader sr = _staxInputFactory.createXMLStreamReader(new StringReader(XML));
sr.nextTag();
XmlTokenStream tokens = new XmlTokenStream(sr, XML, FromXmlParser.Feature.collectDefaults());
Expand All @@ -157,12 +157,36 @@ public void testNested() throws Exception
assertEquals("abc", tokens.getText());
assertEquals(XmlTokenStream.XML_END_ELEMENT, tokens.next());

assertEquals(XmlTokenStream.XML_END_ELEMENT, tokens.next());
assertEquals(XmlTokenStream.XML_END, tokens.next());
sr.close();
}

// For [dataformat-xml#402]
public void testMixedContentAfter() throws Exception
{
String XML = "<root>first<a>123</a>last &amp; final</root>";
XMLStreamReader sr = _staxInputFactory.createXMLStreamReader(new StringReader(XML));
sr.nextTag();
XmlTokenStream tokens = new XmlTokenStream(sr, XML, FromXmlParser.Feature.collectDefaults());

assertEquals(XmlTokenStream.XML_START_ELEMENT, tokens.getCurrentToken());
assertEquals("root", tokens.getLocalName());

assertEquals(XmlTokenStream.XML_TEXT, tokens.next());
assertEquals("first", tokens.getText());

assertEquals(XmlTokenStream.XML_START_ELEMENT, tokens.next());
assertEquals("a", tokens.getLocalName());
assertEquals(XmlTokenStream.XML_TEXT, tokens.next());
assertEquals("123", tokens.getText());
assertEquals(XmlTokenStream.XML_END_ELEMENT, tokens.next());

assertEquals(XmlTokenStream.XML_TEXT, tokens.next());
assertEquals("last & final", tokens.getText());

assertEquals(XmlTokenStream.XML_END_ELEMENT, tokens.next());
assertEquals(XmlTokenStream.XML_END, tokens.next());
sr.close();
}
*/
}

0 comments on commit b31bad6

Please sign in to comment.