You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

601 lines
24 KiB

5 years ago
--===========================================================================--
-- --
-- System.Text --
-- --
--===========================================================================--
--===========================================================================--
-- Author : kurapica125@outlook.com --
-- URL : http://github.com/kurapica/PLoop --
-- Create Date : 2014/10/05 --
-- Update Date : 2019/10/11 --
-- Version : 1.0.2 --
--===========================================================================--
PLoop(function(_ENV)
export {
type = type,
error = error,
ipairs = ipairs,
max = math.max,
yield = coroutine.yield,
tconcat = table.concat,
istype = Class.IsObjectType,
validateValue = Struct.ValidateValue,
RunIterator = Threading.RunIterator,
DEFAULT_BLOCK = 32768,
Prototype, Namespace, Toolset, Iterable
}
__Sealed__() __Final__() interface "System.Text" {}
namespace "System.Text"
__Sealed__() __AutoIndex__()
enum "TextReaderStrategy" { "CHAR", "LINE", "ALL", "BLOCK", "READER" }
__Sealed__()
struct "EncodingDefinition" {
{ name = "encode", type = Function, require = true },
{ name = "decode", type = Function, require = true },
{ name = "strategy",type = TextReaderStrategy, default = TextReaderStrategy.LINE },
{ name = "block", type = NaturalNumber },
}
__Iterator__()
function iterReader(reader, strategy, block)
if strategy == TextReaderStrategy.CHAR then
local index = reader.Position + 1
local chr = reader:Read(1)
while chr do
yield(index, chr)
chr = reader:Read(1)
index = index + 1
end
elseif strategy == TextReaderStrategy.ALL then
yield(reader.Position + 1, reader:ReadToEnd())
elseif strategy == TextReaderStrategy.BLOCK then
block = block or DEFAULT_BLOCK
local index = reader.Position + 1
local text = reader:Read(block)
while text do
yield(index, text)
index = reader.Position + 1
text = reader:Read(block)
end
else
-- Read Line, As Default
local base = reader.Position + 1
local line = reader:ReadLine()
if not line then return end
yield(base, line)
while true do
base = reader.Position
line = reader:ReadLine()
if not line then return end
yield(base, "\n")
yield(base + 1, line)
end
end
end
-----------------------------------------------------------------------
-- Encoding --
-----------------------------------------------------------------------
__Iterator__()
function IterReaderDecoding(decode, reader, strategy, block)
for i, str in iterReader(reader, strategy, block) do
local base = i - 1
local idx = 1
local lcnt = #str
while idx <= lcnt do
local code, len = decode(str, idx)
if not code then return end
yield(base + idx, code)
idx = idx + (len or 1)
end
end
end
__Iterator__()
function IterEncoding(encode, iter, arg1, arg2)
local i = 1
for idx, code in iter, arg1, arg2 do
yield(i, encode(code or idx))
i = i + 1
end
end
local encoding
local newEncoding = function (name, settings)
local encode = settings.encode
local decode = settings.decode
local strategy = settings.strategy
local block = settings.block
if not name:find(".", 1, true) then name = "System.Text." .. name end
if Namespace.GetNamespace(name) then error("The " .. name .. " is already existed", 3) end
local decodes = function(str, startp)
startp = startp or 1
local code, len = decode(str, startp)
if code then return startp + (len or 1), code end
end
return Namespace.SaveNamespace(name, Prototype {
__index = {
-- Encode a unicode code point
Encode = encode,
-- Decode a char based on the index, default 1
Decode = decode,
-- Return an iterator to decode the target string or text reader
Decodes = function (str, startp)
if type(str) == "string" then
return decodes, str, startp
elseif istype(str, TextReader) then
return IterReaderDecoding(decode, str, strategy, block)
else
error("Usage: " .. name .. ".Decodes(string|System.Text.TextReader[, start])", 2)
end
end,
-- Return an iterator to encode unicode code points from another iterator or list
Encodes = function (codes, arg1, arg2)
local ty = type(codes)
if ty == "function" then
-- pass
elseif ty == "table" then
if istype(codes, Iterable) then
codes, arg1, arg2 = codes:GetIterator()
else
codes, arg1, arg2 = ipairs(codes)
end
else
error("Usage: " .. name .. ".Encodes(table|iterator, ...)", 2)
end
return IterEncoding(encode, codes, arg1, arg2)
end
},
__newindex = Toolset.readonly,
__tostring = Namespace.GetNamespaceName,
__metatable = encoding,
})
end
encoding = Prototype (ValidateType, {
__index = {
["IsImmutable"] = function() return true, true end;
["ValidateValue"] = function(_, value) return getmetatable(value) == encoding and value ~= encoding and value end;
["Validate"] = function(value) return getmetatable(value) == encoding and value ~= encoding and value end;
},
__newindex = Toolset.readonly,
__call = function(self, name)
if type(name) ~= "string" then error("Usage: System.Text.Encoding \"name\" { decode = Function, encode = Function[, strategy = TextReaderStrategy] }", 2) end
return function(settings)
local ret, err = validateValue(EncodingDefinition, settings, true)
if not ret or err then error("Usage: System.Text.Encoding \"name\" { decode = Function, encode = Function[, strategy = TextReaderStrategy] }", 2) end
local coder = newEncoding(name, ret)
return coder
end
end,
__tostring = Namespace.GetNamespaceName,
})
--- Represents a character encoding
Namespace.SaveNamespace("System.Text.Encoding", encoding)
--- Represents the ASCII encoding
System.Text.Encoding "ASCIIEncoding" {
encode = string.char,
decode = string.byte,
strategy = TextReaderStrategy.CHAR,
}
-----------------------------------------------------------------------
-- Encoder & Decoder --
-----------------------------------------------------------------------
__Iterator__()
function IterReaderEncoder(encode, reader, strategy, block, ...)
for i, str in iterReader(reader, strategy, block) do
for res in RunIterator(encode, str, ...) do
yield(res)
end
end
end
local encoder
local newEncoder = function (name, settings)
local encode = settings.encode
local decode = settings.decode
local strategy = settings.strategy
local block = settings.block
if not name:find(".", 1, true) then name = "System.Text." .. name end
if Namespace.GetNamespace(name) then error("The " .. name .. " is already existed", 3) end
local usageDecode = "Usage: " .. name .. ".Decode([TextWriter,] String + TextReader, ...)"
local usageEncode = "Usage: " .. name .. ".Encode([TextWriter,] String + TextReader, ...)"
return Namespace.SaveNamespace(name, Prototype {
__index = {
-- Decode string or string from a reader to an iterator
Decodes = strategy == TextReaderStrategy.READER and function(reader, ...)
if type(reader) == "string" then
reader = StringReader(reader)
end
if istype(reader, TextReader) then
return RunIterator(decode, reader, ...)
else
error("Usage: " .. name .. ".Decodes(string|System.Text.TextReader)", 2)
end
end or function(str, ...)
if type(str) == "string" then
return RunIterator(decode, str, ...)
elseif istype(str, TextReader) then
return IterReaderEncoder(decode, str, strategy, block, ...)
else
error("Usage: " .. name .. ".Decodes(string|System.Text.TextReader)", 2)
end
end,
-- Encode string or string from a reader to an iterator
Encodes = strategy == TextReaderStrategy.READER and function(reader, ...)
if type(reader) == "string" then
reader = StringReader(reader)
end
if istype(reader, TextReader) then
return RunIterator(encode, reader, ...)
else
error("Usage: " .. name .. ".Encodes(string|System.Text.TextReader)", 2)
end
end or function(str, ...)
if type(str) == "string" then
return RunIterator(encode, str, ...)
elseif istype(str, TextReader) then
return IterReaderEncoder(encode, str, strategy, block, ...)
else
error("Usage: " .. name .. ".Encodes(string|System.Text.TextReader)", 2)
end
end,
-- Decode string or string from a reader
Decode = strategy == TextReaderStrategy.READER and function(reader, ...)
if type(reader) == "string" then
reader = StringReader(reader)
end
if istype(reader, TextReader) then
local w = StringWriter()
w:Open()
for res in RunIterator(decode, reader, ...) do
w:Write(res)
end
w:Close()
return w:ToString()
else
error("Usage: " .. name .. ".Decode(string|System.Text.TextReader)", 2)
end
end or function (str, ...)
if type(str) == "string" then
local w = StringWriter()
w:Open()
for res in RunIterator(decode, str, ...) do
w:Write(res)
end
w:Close()
return w:ToString()
elseif istype(str, TextReader) then
local w = StringWriter()
w:Open()
for res in IterReader(decode, str, strategy, block, ...) do
w:Write(res)
end
w:Close()
return w:ToString()
else
error("Usage: " .. name .. ".Decode(string|System.Text.TextReader)", 2)
end
end,
-- Encode string or string from a reader
Encode = strategy == TextReaderStrategy.READER and function(reader, ...)
if type(reader) == "string" then
reader = StringReader(reader)
end
if istype(reader, TextReader) then
local w = StringWriter()
w:Open()
for res in RunIterator(encode, reader, ...) do
w:Write(res)
end
w:Close()
return w:ToString()
else
error("Usage: " .. name .. ".Encode(string|System.Text.TextReader)", 2)
end
end or function (str, ...)
if type(str) == "string" then
local w = StringWriter()
w:Open()
for res in RunIterator(encode, str, ...) do
w:Write(res)
end
w:Close()
return w:ToString()
elseif istype(str, TextReader) then
local w = StringWriter()
w:Open()
for res in IterReader(encode, str, strategy, block, ...) do
w:Write(res)
end
w:Close()
return w:ToString()
else
error("Usage: " .. name .. ".Encode(string|System.Text.TextReader)", 2)
end
end,
},
__newindex = Toolset.readonly,
__tostring = Namespace.GetNamespaceName,
__metatable = encoder,
})
end
encoder = Prototype (ValidateType, {
__index = {
["IsImmutable"] = function() return true, true end;
["ValidateValue"] = function(_, value) return getmetatable(value) == encoder and value ~= encoder and value end;
["Validate"] = function(value) return getmetatable(value) == encoder and value ~= encoder and value end;
},
__newindex = Toolset.readonly,
__call = function(self, name)
if type(name) ~= "string" then error("Usage: System.Text.Encoder \"name\" { decode = Function, encode = Function[, strategy = TextReaderStrategy] }", 2) end
return function(settings)
local ret, err = validateValue(EncodingDefinition, settings, true)
if not ret or err then error("Usage: System.Text.Encoder \"name\" { decode = Function, encode = Function[, strategy = TextReaderStrategy] }", 2) end
local coder = newEncoder(name, ret)
return coder
end
end,
__tostring = Namespace.GetNamespaceName,
})
--- Represents a character encoder
Namespace.SaveNamespace("System.Text.Encoder", encoder)
-----------------------------------------------------------------------
-- Reader & Writer --
-----------------------------------------------------------------------
--- Represents a writer that can write a sequential series of characters
__Sealed__()
class "TextWriter" (function (_ENV)
extend "IAutoClose"
--- Gets the character encoding in which the output is written.
__Abstract__() property "Encoding" { type = System.Text.Encoding }
--- Gets or sets the line terminator string used by the current TextWriter.
__Abstract__() property "NewLine" { type = String, default = "\n" }
--- Clears all buffers for the current writer and causes any buffered data to be written to the underlying device.
__Abstract__() function Flush(self) end
--- Writes the data to the text string or stream.
__Abstract__() function Write(self, data) end
--- Writes the data(could be nil) followed by a line terminator to the text string or stream.
__Abstract__() function WriteLine(self, data) if data then self:Write(data) end self:Write(self.NewLine) end
end)
--- Represents a reader that can read a sequential series of characters
__Sealed__()
class "TextReader" (function (_ENV)
extend "IAutoClose"
--- Gets the character encoding in which the input is read.
__Abstract__() property "Encoding" { type = System.Text.Encoding }
--- Gets or Sets the operation position
__Abstract__() property "Position" { type = Number }
--- Reads the next character from the text reader and advances the character position by one character.
__Abstract__() function Read(self) end
--- Reads a line of characters from the text reader and returns the data as a string.
__Abstract__() function ReadLine(self) end
--- Return the ReadLine method and self for a generic for
function ReadLines(self) return self.ReadLine, self end
--- Reads a specified maximum number of characters from the current text reader and writes the data to a buffer, beginning at the specified index.
__Abstract__() function ReadBlock(self, count, index) end
--- Reads all characters from the current position to the end of the text reader and returns them as one string.
__Abstract__() function ReadToEnd(self) end
end)
-----------------------------------------------------------------------
-- String Reader & Writer --
-----------------------------------------------------------------------
--- Represents a writer that can write a sequential series of characters to files
__Sealed__()
class "StringWriter" (function(_ENV)
inherit "TextWriter"
export {
tconcat = table.concat,
wipe = Toolset.wipe,
}
field {
temp = false,
count = 0,
}
-----------------------------------------------------------
-- property --
-----------------------------------------------------------
--- the final result
property "Result" { set = false, field = 0 }
-----------------------------------------------------------
-- method --
-----------------------------------------------------------
__Arguments__{ String }
function Write(self, text)
local cnt = self.count + 1
self.count = cnt
self.temp[cnt] = text
end
function Open(self)
self.temp = {}
self.count = 0
end
function Close(self)
self[0] = tconcat(self.temp)
self.temp = false
self.count = 0
end
function ToString(self)
return self.Result
end
-----------------------------------------------------------
-- meta-method --
-----------------------------------------------------------
function __tostring(self)
return self.Result
end
end)
--- Represents a writer that can write a sequential series of characters to files
__Sealed__()
class "StringReader" (function(_ENV)
inherit "TextReader"
export {
strsub = string.sub,
strfind = string.find,
strmatch = string.match,
floor = math.floor,
min = math.min,
max = math.max,
}
--- Gets or sets the position. negative number means start from the end of the file.
property "Position" {
type = Number,
field = "__seekpos",
set = function(self, pos)
pos = floor(pos)
if pos < 0 then
self.__seekpos = min(max(0, self.__length + pos), self.__length)
else
self.__seekpos = min(pos, self.__length)
end
end,
}
--- Whether discard the indent at the head of the each line
property "DiscardIndents" { type = Boolean, default = false }
-- Method
function Read(self)
self.__skipindent = false -- only skip indent when keeping read line
local pos = self.__seekpos + 1
if pos <= self.__length then
self.__seekpos = pos
return strsub(self.__content, pos, pos)
end
end
function ReadLine(self)
local pos = self.__seekpos + 1
if pos <= self.__length then
local nxtl, endl = strfind(self.__content, "\n", pos)
local line
if nxtl then
self.__seekpos = endl
line = strsub(self.__content, pos, nxtl - 1)
else
self.__seekpos = self.__length
line = strsub(self.__content, pos)
end
if self.__skipindent == -1 then
self.__skipindent = self.DiscardIndents and strmatch(line, "^%s+") or false
end
if self.__skipindent then
if line:find(self.__skipindent, 1, true) then
line = line:sub(#self.__skipindent + 1)
end
end
return line
end
end
function ReadToEnd(self)
self.__skipindent = false -- only skip indent when keeping read line
local pos = self.__seekpos + 1
if pos <= self.__length then
self.__seekpos = self.__length
return strsub(self.__content, pos)
end
end
function ReadBlock(self, count, index)
self.__skipindent = false -- only skip indent when keeping read line
if index then self.Position = index end
local pos = self.__seekpos + 1
if pos <= self.__length then
self.__seekpos = min(self.__length, pos + count - 1)
return strsub(self.__content, pos, self.__seekpos)
end
end
-- Constructor
__Arguments__{ String }
function __new(_, str)
return {
__content = str,
__length = #str,
__seekpos = 0,
__skipindent = -1,
}, true
end
end)
end)