Expose u32 source offsets as usize

Computations should use usize, so this makes things more convenient.
Post-FFI we can make SourceRange fields private, to enforce this even easier.
This commit is contained in:
Johannes Altmanninger 2023-04-18 16:57:17 +02:00
parent 2ca27d2c5b
commit fc5e97e55e
4 changed files with 84 additions and 28 deletions

View File

@ -141,8 +141,7 @@ pub trait Node: Acceptor + ConcreteNode + std::fmt::Debug {
/// \return the source code for this node, or none if unsourced. /// \return the source code for this node, or none if unsourced.
fn try_source<'s>(&self, orig: &'s wstr) -> Option<&'s wstr> { fn try_source<'s>(&self, orig: &'s wstr) -> Option<&'s wstr> {
self.try_source_range() self.try_source_range().map(|r| &orig[r.start()..r.end()])
.map(|r| &orig[r.start as usize..r.end() as usize])
} }
/// \return the source code for this node, or an empty string if unsourced. /// \return the source code for this node, or an empty string if unsourced.
@ -2506,16 +2505,16 @@ impl<'a> TokenStream<'a> {
result.may_be_variable_assignment = variable_assignment_equals_pos(text).is_some(); result.may_be_variable_assignment = variable_assignment_equals_pos(text).is_some();
result.tok_error = token.error; result.tok_error = token.error;
assert!(token.offset < SOURCE_OFFSET_INVALID); assert!(token.offset() < SOURCE_OFFSET_INVALID);
result.source_start = token.offset; result.set_source_start(token.offset());
result.source_length = token.length; result.set_source_length(token.length());
if token.error != TokenizerError::none { if token.error != TokenizerError::none {
let subtoken_offset = token.error_offset_within_token; let subtoken_offset = token.error_offset_within_token();
// Skip invalid tokens that have a zero length, especially if they are at EOF. // Skip invalid tokens that have a zero length, especially if they are at EOF.
if subtoken_offset < result.source_length { if subtoken_offset < result.source_length() {
result.source_start += subtoken_offset; result.set_source_start(result.source_start() + subtoken_offset);
result.source_length = token.error_length; result.set_source_length(token.error_length());
} }
} }
@ -2584,7 +2583,7 @@ macro_rules! parse_error_range {
FLOG!(ast_construction, "%*sparse error - begin unwinding", $self.spaces(), ""); FLOG!(ast_construction, "%*sparse error - begin unwinding", $self.spaces(), "");
// TODO: can store this conditionally dependent on flags. // TODO: can store this conditionally dependent on flags.
if $range.start != SOURCE_OFFSET_INVALID { if $range.start() != SOURCE_OFFSET_INVALID {
$self.errors.push($range); $self.errors.push($range);
} }
@ -2592,8 +2591,8 @@ macro_rules! parse_error_range {
let mut err = ParseError::default(); let mut err = ParseError::default();
err.text = text.unwrap(); err.text = text.unwrap();
err.code = $code; err.code = $code;
err.source_start = $range.start as usize; err.source_start = $range.start();
err.source_length = $range.length as usize; err.source_length = $range.length();
errors.0.push(err); errors.0.push(err);
} }
} }
@ -3384,8 +3383,8 @@ impl<'s> Populator<'s> {
"%*schomping range %u-%u", "%*schomping range %u-%u",
self.spaces(), self.spaces(),
"", "",
tok.source_start, tok.source_start(),
tok.source_length tok.source_length()
); );
} }
FLOG!(ast_construction, "%*sdone unwinding", self.spaces(), ""); FLOG!(ast_construction, "%*sdone unwinding", self.spaces(), "");

View File

@ -12,7 +12,7 @@ use widestring_suffix::widestrs;
pub type SourceOffset = u32; pub type SourceOffset = u32;
pub const SOURCE_OFFSET_INVALID: SourceOffset = SourceOffset::MAX; pub const SOURCE_OFFSET_INVALID: usize = SourceOffset::MAX as _;
pub const SOURCE_LOCATION_UNKNOWN: usize = usize::MAX; pub const SOURCE_LOCATION_UNKNOWN: usize = usize::MAX;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
@ -85,8 +85,10 @@ mod parse_constants_ffi {
} }
extern "Rust" { extern "Rust" {
fn end(self: &SourceRange) -> u32; #[cxx_name = "end"]
fn contains_inclusive(self: &SourceRange, loc: u32) -> bool; fn end_ffi(self: &SourceRange) -> u32;
#[cxx_name = "contains_inclusive"]
fn contains_inclusive_ffi(self: &SourceRange, loc: u32) -> bool;
} }
/// IMPORTANT: If the following enum table is modified you must also update token_type_description below. /// IMPORTANT: If the following enum table is modified you must also update token_type_description below.
@ -245,15 +247,34 @@ pub use parse_constants_ffi::{
}; };
impl SourceRange { impl SourceRange {
pub fn new(start: SourceOffset, length: SourceOffset) -> Self { pub fn new(start: usize, length: usize) -> Self {
SourceRange { start, length } SourceRange {
start: start.try_into().unwrap(),
length: length.try_into().unwrap(),
}
} }
pub fn end(&self) -> SourceOffset { pub fn start(&self) -> usize {
self.start.try_into().unwrap()
}
pub fn length(&self) -> usize {
self.length.try_into().unwrap()
}
pub fn end(&self) -> usize {
self.start
.checked_add(self.length)
.expect("Overflow")
.try_into()
.unwrap()
}
fn end_ffi(&self) -> u32 {
self.start.checked_add(self.length).expect("Overflow") self.start.checked_add(self.length).expect("Overflow")
} }
// \return true if a location is in this range, including one-past-the-end. // \return true if a location is in this range, including one-past-the-end.
pub fn contains_inclusive(&self, loc: SourceOffset) -> bool { pub fn contains_inclusive(&self, loc: usize) -> bool {
self.start() <= loc && loc - self.start() <= self.length()
}
fn contains_inclusive_ffi(&self, loc: u32) -> bool {
self.start <= loc && loc - self.start <= self.length self.start <= loc && loc - self.start <= self.length
} }
} }

View File

@ -32,8 +32,8 @@ pub struct ParseToken {
pub may_be_variable_assignment: bool, pub may_be_variable_assignment: bool,
/// If this is a tokenizer error, that error. /// If this is a tokenizer error, that error.
pub tok_error: TokenizerError, pub tok_error: TokenizerError,
pub source_start: SourceOffset, source_start: SourceOffset,
pub source_length: SourceOffset, source_length: SourceOffset,
} }
impl ParseToken { impl ParseToken {
@ -46,14 +46,26 @@ impl ParseToken {
is_newline: false, is_newline: false,
may_be_variable_assignment: false, may_be_variable_assignment: false,
tok_error: TokenizerError::none, tok_error: TokenizerError::none,
source_start: SOURCE_OFFSET_INVALID, source_start: SOURCE_OFFSET_INVALID.try_into().unwrap(),
source_length: 0, source_length: 0,
} }
} }
pub fn set_source_start(&mut self, value: usize) {
self.source_start = value.try_into().unwrap();
}
pub fn source_start(&self) -> usize {
self.source_start.try_into().unwrap()
}
pub fn set_source_length(&mut self, value: usize) {
self.source_length = value.try_into().unwrap();
}
pub fn source_length(&self) -> usize {
self.source_length.try_into().unwrap()
}
/// \return the source range. /// \return the source range.
/// Note the start may be invalid. /// Note the start may be invalid.
pub fn range(&self) -> SourceRange { pub fn range(&self) -> SourceRange {
SourceRange::new(self.source_start, self.source_length) SourceRange::new(self.source_start(), self.source_length())
} }
/// \return whether we are a string with the dash prefix set. /// \return whether we are a string with the dash prefix set.
pub fn is_dash_prefix_string(&self) -> bool { pub fn is_dash_prefix_string(&self) -> bool {

View File

@ -269,7 +269,7 @@ impl Tok {
Tok { Tok {
offset: 0, offset: 0,
length: 0, length: 0,
error_offset_within_token: SOURCE_OFFSET_INVALID, error_offset_within_token: SOURCE_OFFSET_INVALID.try_into().unwrap(),
error_length: 0, error_length: 0,
error: TokenizerError::none, error: TokenizerError::none,
type_: r#type, type_: r#type,
@ -285,6 +285,30 @@ impl Tok {
fn get_source_ffi(self: &Tok, str: &CxxWString) -> UniquePtr<CxxWString> { fn get_source_ffi(self: &Tok, str: &CxxWString) -> UniquePtr<CxxWString> {
self.get_source(str.as_wstr()).to_ffi() self.get_source(str.as_wstr()).to_ffi()
} }
pub fn set_offset(&mut self, value: usize) {
self.offset = value.try_into().unwrap();
}
pub fn offset(&self) -> usize {
self.offset.try_into().unwrap()
}
pub fn length(&self) -> usize {
self.length.try_into().unwrap()
}
pub fn set_length(&mut self, value: usize) {
self.length = value.try_into().unwrap();
}
pub fn set_error_offset_within_token(&mut self, value: usize) {
self.error_offset_within_token = value.try_into().unwrap();
}
pub fn error_offset_within_token(&self) -> usize {
self.error_offset_within_token.try_into().unwrap()
}
pub fn error_length(&self) -> usize {
self.error_length.try_into().unwrap()
}
pub fn set_error_length(&mut self, value: usize) {
self.error_length = value.try_into().unwrap();
}
} }
/// The tokenizer struct. /// The tokenizer struct.
@ -818,8 +842,8 @@ impl Tokenizer {
} }
let mut result = Tok::new(TokenType::string); let mut result = Tok::new(TokenType::string);
result.offset = buff_start as u32; result.set_offset(buff_start);
result.length = (self.token_cursor - buff_start) as u32; result.set_length(self.token_cursor - buff_start);
result result
} }
} }