הערה
הגישה לדף זה מחייבת הרשאה. באפשרותך לנסות להיכנס או לשנות מדריכי כתובות.
הגישה לדף זה מחייבת הרשאה. באפשרותך לנסות לשנות מדריכי כתובות.
Question
Monday, December 1, 2014 5:54 PM
I am having some issues writing a large set of data to an XML file. I am using the following class to serialize objects to xml and then write them to disk:
''' <summary>
''' Borrowed from http://icanmakethiswork.blogspot.ca/2012/11/xsdxml-schema-generator-xsdexe-taking.html
''' </summary>
''' <typeparam name="T"></typeparam>
''' <remarks></remarks>
Public Class XMLConverter(Of T)
Private Shared serializer As XmlSerializer = Nothing
''' <summary>
''' Static constructor that initialises the serializer for this type
''' </summary>
Shared Sub New()
serializer = New XmlSerializer(GetType(T))
End Sub
''' <summary>
''' Write a node to an xmlwriter
''' </summary>
''' <param name="writer"></param>
''' <param name="itemToAppend">the object to be converted and written</param>
''' <remarks></remarks>
Public Shared Sub AppendToXml(writer As XmlWriter, itemToAppend As T)
Dim strObj As String = ToXML(itemToAppend)
strObj = XMLCleaner.CleanResult(strObj)
writer.WriteRaw(strObj)
writer.Flush()
strObj = Nothing
End Sub
''' <summary>
''' Serialize the supplied object into a string of XML
''' </summary>
''' <param name="obj"></param>
''' <returns></returns>
Public Shared Function ToXML(obj As T) As String
Dim strXml As String = ""
Using memoryStream As New MemoryStream()
serializer.Serialize(memoryStream, obj)
memoryStream.Position = 0
Using sr As New StreamReader(memoryStream)
strXml = sr.ReadToEnd()
End Using
End Using
Return strXml
End Function
End Class
Public Class XMLCleaner
'This is just for removing junk and slightly modifying the output
Public Shared Function CleanResult(result As String) As String
Dim retVal As String = Regex.Replace(result, "\sxmlns.+?"".*?""", "")
retVal = Regex.Replace(retVal, "SavedSearchRecord", "Record")
retVal = retVal.Replace("<?xml version=""1.0""?>", "")
retVal = Regex.Replace(retVal, vbCrLf, vbCrLf & " ")
Return retVal
End Function
End Class
And am calling this like so:
XMLConverter(Of SavedSearchRecord).AppendToXml(writer, record)
The issue is that memory is quickly being accumulated as I append new records to the file and ultimately results in an out of memory exception.
I've seen that not caching the serializer can result in this behaviour, but I think I've sidestepped that issue in my implementation. (Please correct me if I am wrong).
After examining a memory dump:
716821b4 28535 10497120 System.String
71682b74 140213 145562968 System.Char[]
71685670 140258 758802112 System.Byte[]
I can see that I have an enormous number of byte & char arrays getting stuck in memory. The data in the arrays leads me to believe that these are being stranded in memory by the call to the serializer (serializer.Serialize(memoryStream, obj)) as they contain the unmodified serialized object strings.
Given that the memory stream is in a Using block, I can't figure out why these byte arrays are not being collected by the GC. I looked briefly into the XMLSerializer class and I think the char arrays are being created within the serializer (it seems to conver the stream to a writer).
Can anyone tell me how to prevent this code from culminating in out of memory exceptions? Or more importantly explain why the serializer isn't releasing memory as it seems like it should?
FYI code is written using .NET 4.0
All replies (9)
Monday, December 1, 2014 7:38 PM ✅Answered
The XmlSerializer leaks memory ... That is a fact.
the reason is that the serializer creates an assembly to serialize and a loaded assembly in a AppDomain cannot be unloaded
Two work around:
1) Cache your serializer, specifying the RootAttribute and reuse it .. (Only one assembly will leak, Once, which is a minimum amount of memory)
This work around is the best if all the serialized objects are the same type and the object serialized is not too large
2) Use it in its own App domain(In which case it can be unloaded)
So in your case, the work around #1 is to be choose (This is about the same thing that you were doing, except in your code, you didn't had the rootAttribute specified) ... (which was causing a new assembly to be created each time you were calling the method).
Public Class XMLConverter(Of T)
Private Shared serializer As XmlSerializer = New XmlSerializer(GetType(T), New XmlRootAttribute("rootNode"))
''' <summary>
''' Write a node to an xmlwriter
''' </summary>
''' <param name="writer"></param>
''' <param name="itemToAppend">the object to be converted and written</param>
''' <remarks></remarks>
Public Shared Sub AppendToXml(writer As XmlWriter, itemToAppend As T)
Dim strObj As String = ToXML(itemToAppend)
strObj = XMLCleaner.CleanResult(strObj)
writer.WriteRaw(strObj)
writer.Flush()
strObj = Nothing
End Sub
''' <summary>
''' Serialize the supplied object into a string of XML
''' </summary>
''' <param name="obj"></param>
''' <returns></returns>
Public Shared Function ToXML(obj As T) As String
Dim strXml As String = ""
Using memoryStream As New MemoryStream()
serializer.Serialize(memoryStream, obj)
memoryStream.Position = 0
Using sr As New StreamReader(memoryStream)
strXml = sr.ReadToEnd()
End Using
End Using
Return strXml
End Function
End Class
Public Class XMLCleaner
'This is just for removing junk and slightly modifying the output
Public Shared Function CleanResult(result As String) As String
Dim retVal As String = Regex.Replace(result, "\sxmlns.+?"".*?""", "")
retVal = Regex.Replace(retVal, "SavedSearchRecord", "Record")
retVal = retVal.Replace("<?xml version=""1.0""?>", "")
retVal = Regex.Replace(retVal, vbCrLf, vbCrLf & " ")
Return retVal
End Function
End Class
Monday, December 1, 2014 10:10 PM ✅Answered
beleive it or not ... that wont change nothing
Here a blog about it on MSDN: http://blogs.msdn.com/b/tess/archive/2006/02/15/532804.aspx
The XmlSerializer does create an assembly under the hood to serialize. .. and ... When an assembly is created and loaded, it is impossible to unload it. (They are not in the GC heap)
So,
1) each time you create a new Serializer, an assembly is created and loaded into your application domain
2) each time an existing and reused serializer need to serialize something different it creates and load an assembly into your application domain. When the RootAttribute is not specified, each time you use the serializer, it assume that what need to be serialized is different and create and load a new assembly into your application domain
All those assembly that are loaded and never removed are using memory, and the memory can never be reclaimed
Try the code I posted, declaring the serializer as I show. ... You will have no visible memory leak (I say no visible because only a single assembly will be created and leaked. This is only a small amount of memory)
NOTE the code I show will work at stopping the large memory leak only if the objects that you are serializing are all of the same type. if they are not, your only option will be to create a new AppDomain ... create and use the serializer into this appDomain ... And finally unload the domain to reclaim the memory.
If you need some example using an implementation in an appDomain let me know
Monday, December 1, 2014 9:19 PM
I'm not sure how calling the Serialize method on an existing serialzer would cache a new assembly? Each time I call the function, it is on the existing serializer.
According to this post "The default constructors XmlSerializer(type) and XmlSerializer(type, defaultNameSpace) caches the dynamic assembly so if you use those constructors only one copy of the dynamic assembly needs to be created."
So I don't believe the leak in my program is being caused by spinning up additional assemblies for the serializer. The post linked above also notes that using the constructor you post above is actually one of the causes for the assembly issue you mentioned (unless, as you said, you specifically cache it).
I was careful to recover a memory dump and comb through it to make sure that the problem you describe was not the case before I posted this here. I'm fairly sure the memory is being eaten up by thousands of byte and char arrays generated by the serializer and not by spurious assemblies.
Unfortunately I'm not familiar with 'App Domain' and I don't really know what you are referring to.
Monday, December 1, 2014 9:43 PM
TH,
Crazy knows this stuff inside out - he's perhaps the best one here to help diagnose your actual problem, but have you considered writing the XML yourself (in your own method)?
Is it too much data at once or ... too involved or just what?
Still lost in code, just at a little higher level.
:-)
Tuesday, December 2, 2014 3:33 PM
You're absolutely correct. Adding the extra parameter to the constructor did away with all the excess memory, I wish I had tried it the first time around!
I was actually using that post to debug my issue and following the steps therein did not lead to the huge list of Unknown Modules that are mentioned which is why I thought I was having a different problem. Additionally, she specifically notes in her post that the constructor I was originally using was not supposed to exhibit the behavior you described. Thank you for persevering!
I'm still somewhat baffled as to why XMLSerializer would generate multiple assemblies to serialize the same type (when using the same instance), but at least I can finally move on to other things! Thank you again!
If you have a stackoverflow account, I had posted the same question there. If you'd care to copy paste your first response I will happily mark it as the answer there aswell.
Tuesday, December 2, 2014 4:09 PM
I'm still somewhat baffled as to why XMLSerializer would generate multiple assemblies to serialize the same type (when using the same instance), but at least I can finally move on to other things! Thank you again!
When You don't specify an RootAttribute, the Serializer assume that it can be used to encode different XML schema. Therefore, it will have to implement and compile different layout engine (for each different schema). It has no other choice, it cannot know that you will use it always for the same schema.
When you specify the RootAttribute, then it assume that it will be use to always encode the same schema an then it will reuse the same layout engine. (But when specifying a root node, now you lock the Serializer to a particular schema, it cannot be used anymore to encode a different schema)
Tuesday, December 2, 2014 7:03 PM
When You don't specify an RootAttribute, the Serializer assume that it can be used to encode different XML schema.
This seems like a unusual assumption given that the XMLSerializer constructor takes a Type as its first argument. Does that type not provide the schema?
I am admittedly not an expert in serialization, but it seems incredibly non-intuitive for it to work the way that it does. I don't grasp how providing the XmlRootAttribute gives the serializer any more information than the Type does.
Forgive me if I am being obtuse.
Tuesday, December 2, 2014 7:12 PM
The Haunt
I agree with you, this is far from being intuitive and I really can't explain why Microsoft implemented it to works that way.
But the real problem is not how it works, the problem is that it is not documented !! -- I just can't believe that Microsoft didn't realized this problem when it have implemented the serializer. They must have known that loading an assembly in the main AppDomain would result in memory leak .. and should have documented the fact and the possible work around
Tuesday, December 2, 2014 10:39 PM
Just as complement on the info
Here a quick example of your code using a AppDomain
This don't leak memory. When you call FreeSerializerResources, all memory used by the serializer are released. However, note that as long you don't call FreeSerializerResources, the memory usage keep building, Therefore you may run out of memory if you reuse the seriasliser without never freeing the resources.
Also note that methods that are accessible cross domain (meaning methods that can be called from a class that is not part of the AppDomain) cannot be static (Shared)
Note: the XmlWriter is not a serializable object. Therefore it has to be created in the class XMLConverter since it cannot pass the proxy
Imports System.IO
Imports System.Text.RegularExpressions
Imports System.Xml
Imports System.Xml.Serialization
Public Class Form1
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
Dim Domain As AppDomain
Dim Serializer As XMLConverter(Of PurchaseOrder) = Nothing
Domain = GetSerializer(Serializer)
For x = 1 To 10000
Dim po As New PurchaseOrder
Serializer.AppendToXml("MyPO.xml", po)
Next
Serializer.Dispose() 'Dispose the serializer before freeing the resources
FreeSerializerResources(Domain)
End Sub
Private Function GetSerializer(ByRef DomainSerializer As XMLConverter(Of PurchaseOrder)) As AppDomain
Dim domain As AppDomain = AppDomain.CreateDomain("Domain")
DomainSerializer = CType(domain.CreateInstanceAndUnwrap(Me.GetType.Assembly.FullName, GetType(XMLConverter(Of PurchaseOrder)).FullName), XMLConverter(Of PurchaseOrder))
Return domain
End Function
Private Sub FreeSerializerResources(domain As AppDomain)
AppDomain.Unload(domain)
GC.Collect()
End Sub
End Class
'=============================
'This class and all the resources it creates are loaded and executed in the created AppDomain
'=============================
Public Class XMLConverter(Of T) : Inherits MarshalByRefObject : Implements IDisposable
Private serializer As XmlSerializer = New XmlSerializer(GetType(T))
Private settings As New XmlWriterSettings()
Private writer As XmlWriter
Private disposedValue As Boolean
Sub New()
settings.Indent = True
settings.NewLineOnAttributes = True
End Sub
''' <summary>
''' Write a node to an xmlwriter
''' </summary>
''' <param name="itemToAppend">the object to be converted and written</param>
''' <remarks></remarks>
Public Sub AppendToXml(path As String, itemToAppend As T)
If writer Is Nothing Then
writer = XmlWriter.Create(path, settings)
End If
Dim strObj As String = ToXML(itemToAppend)
strObj = XMLCleaner.CleanResult(strObj)
writer.WriteRaw(strObj)
writer.Flush()
strObj = Nothing
End Sub
''' <summary>
''' Serialize the supplied object into a string of XML
''' </summary>
''' <param name="obj"></param>
''' <returns></returns>
Public Function ToXML(obj As T) As String
Dim strXml As String = ""
Using memoryStream As New MemoryStream()
serializer.Serialize(memoryStream, obj)
memoryStream.Position = 0
Using sr As New StreamReader(memoryStream)
strXml = sr.ReadToEnd()
End Using
End Using
Return strXml
End Function
Protected Overridable Sub Dispose(disposing As Boolean)
If Not Me.disposedValue Then
If disposing Then
writer.Close()
writer.Dispose()
End If
End If
Me.disposedValue = True
End Sub
Public Sub Dispose() Implements IDisposable.Dispose
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
End Class
Public Class XMLCleaner
'This is just for removing junk and slightly modifying the output
Public Shared Function CleanResult(result As String) As String
Dim retVal As String = Regex.Replace(result, "\sxmlns.+?"".*?""", "")
retVal = Regex.Replace(retVal, "SavedSearchRecord", "Record")
retVal = retVal.Replace("<?xml version=""1.0""?>", "")
retVal = Regex.Replace(retVal, vbCrLf, vbCrLf & " ")
Return retVal
End Function
End Class
<Serializable>
<XmlRootAttribute("PurchaseOrder")>
Public Class PurchaseOrder
Public ShipTo As String
Public OrderDate As String
Public OrderedItems() As String
Public SubTotal As Decimal
Public ShipCost As Decimal
Public TotalCost As Decimal
End Class